back to Claude Sonnet 3.5 - Fill-in summary
Claude Sonnet 3.5 - Fill-in: geopandas
Failed to run pytests for test tests
ImportError while loading conftest '/testbed/geopandas/conftest.py'.
geopandas/__init__.py:1: in <module>
from geopandas._config import options
geopandas/_config.py:66: in <module>
, validator=_validate_display_precision, callback=None)
E NameError: name '_validate_display_precision' is not defined
Patch diff
diff --git a/geopandas/_compat.py b/geopandas/_compat.py
index 2c7e74f..a3eec3a 100644
--- a/geopandas/_compat.py
+++ b/geopandas/_compat.py
@@ -33,7 +33,15 @@ def import_optional_dependency(name: str, extra: str=''):
-------
module
"""
- pass
+ try:
+ module = importlib.import_module(name)
+ return module
+ except ImportError:
+ if extra:
+ msg = f"Missing optional dependency '{name}'. {extra}"
+ else:
+ msg = f"Missing optional dependency '{name}'. Use pip or conda to install {name}."
+ raise ImportError(msg) from None
try:
diff --git a/geopandas/_decorator.py b/geopandas/_decorator.py
index d242f70..1a16327 100644
--- a/geopandas/_decorator.py
+++ b/geopandas/_decorator.py
@@ -20,4 +20,21 @@ def doc(*docstrings: Union[str, Callable], **params) ->Callable:
**params
The string which would be used to format docstring template.
"""
- pass
+ def decorator(func: Callable) ->Callable:
+ # Store original docstrings
+ func._docstring_components = list(docstrings)
+
+ # Concatenate and process docstrings
+ doc = func.__doc__ or ""
+ for docstring in docstrings:
+ if callable(docstring):
+ doc += dedent(docstring.__doc__ or "")
+ else:
+ doc += dedent(docstring)
+
+ # Perform string substitution
+ func.__doc__ = doc.format(**params)
+
+ return func
+
+ return decorator
diff --git a/geopandas/_version.py b/geopandas/_version.py
index 61aaa9f..b933b9d 100644
--- a/geopandas/_version.py
+++ b/geopandas/_version.py
@@ -10,7 +10,15 @@ import functools
def get_keywords() ->Dict[str, str]:
"""Get the keywords needed to look up the version information."""
- pass
+ # these strings will be replaced by git during git-archive.
+ # setup.py/versioneer.py will grep for the variable names, so they must
+ # each be defined on a line of their own. _version.py will just call
+ # get_keywords().
+ git_refnames = "$Format:%d$"
+ git_full = "$Format:%H$"
+ git_date = "$Format:%ci$"
+ keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+ return keywords
class VersioneerConfig:
@@ -25,7 +33,15 @@ class VersioneerConfig:
def get_config() ->VersioneerConfig:
"""Create, populate and return the VersioneerConfig() object."""
- pass
+ # these strings are filled in when 'setup.py versioneer' creates _version.py
+ cfg = VersioneerConfig()
+ cfg.VCS = "git"
+ cfg.style = "pep440"
+ cfg.tag_prefix = "v"
+ cfg.parentdir_prefix = "geopandas-"
+ cfg.versionfile_source = "geopandas/_version.py"
+ cfg.verbose = False
+ return cfg
class NotThisMethod(Exception):
@@ -38,14 +54,49 @@ HANDLERS: Dict[str, Dict[str, Callable]] = {}
def register_vcs_handler(vcs: str, method: str) ->Callable:
"""Create decorator to mark a method as the handler of a VCS."""
- pass
+ def decorate(f):
+ """Store f in HANDLERS[vcs][method]."""
+ if vcs not in HANDLERS:
+ HANDLERS[vcs] = {}
+ HANDLERS[vcs][method] = f
+ return f
+ return decorate
def run_command(commands: List[str], args: List[str], cwd: Optional[str]=
None, verbose: bool=False, hide_stderr: bool=False, env: Optional[Dict[
str, str]]=None) ->Tuple[Optional[str], Optional[int]]:
"""Call the given command(s)."""
- pass
+ assert isinstance(commands, list)
+ p = None
+ for c in commands:
+ try:
+ dispcmd = str([c] + args)
+ # remember shell=False, so use git.cmd on windows, not just git
+ p = subprocess.Popen([c] + args, cwd=cwd, env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr
+ else None))
+ break
+ except EnvironmentError:
+ e = sys.exc_info()[1]
+ if e.errno == errno.ENOENT:
+ continue
+ if verbose:
+ print("unable to run %s" % dispcmd)
+ print(e)
+ return None, None
+ else:
+ if verbose:
+ print("unable to find command, tried %s" % (commands,))
+ return None, None
+ stdout = p.communicate()[0].strip().decode()
+ if p.returncode != 0:
+ if verbose:
+ print("unable to run %s (error)" % dispcmd)
+ print("stdout was %s" % stdout)
+ return None, p.returncode
+ return stdout, p.returncode
def versions_from_parentdir(parentdir_prefix: str, root: str, verbose: bool
@@ -56,20 +107,102 @@ def versions_from_parentdir(parentdir_prefix: str, root: str, verbose: bool
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
- pass
+ rootdirs = []
+
+ for i in range(3):
+ dirname = os.path.basename(root)
+ if dirname.startswith(parentdir_prefix):
+ return {"version": dirname[len(parentdir_prefix):],
+ "full-revisionid": None,
+ "dirty": False, "error": None, "date": None}
+ else:
+ rootdirs.append(root)
+ root = os.path.dirname(root)
+
+ if verbose:
+ print("Tried directories %s but none started with prefix %s" %
+ (str(rootdirs), parentdir_prefix))
+ raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@register_vcs_handler('git', 'get_keywords')
def git_get_keywords(versionfile_abs: str) ->Dict[str, str]:
"""Extract version information from the given file."""
- pass
+ # the code embedded in _version.py can just fetch the value of these
+ # keywords. When used from setup.py, we don't want to import _version.py,
+ # so we do it with a regexp instead. This function is not used from
+ # _version.py.
+ keywords = {}
+ try:
+ f = open(versionfile_abs, "r")
+ for line in f.readlines():
+ if line.strip().startswith("git_refnames ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["refnames"] = mo.group(1)
+ if line.strip().startswith("git_full ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["full"] = mo.group(1)
+ if line.strip().startswith("git_date ="):
+ mo = re.search(r'=\s*"(.*)"', line)
+ if mo:
+ keywords["date"] = mo.group(1)
+ f.close()
+ except EnvironmentError:
+ pass
+ return keywords
@register_vcs_handler('git', 'keywords')
def git_versions_from_keywords(keywords: Dict[str, str], tag_prefix: str,
verbose: bool) ->Dict[str, Any]:
"""Get version information from git keywords."""
- pass
+ if not keywords:
+ raise NotThisMethod("no keywords at all, weird")
+ date = keywords.get("date")
+ if date is not None:
+ # git-2.2.0 added "%cI", which expands to an ISO-8601 format
+ date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+ refnames = keywords["refnames"].strip()
+ if refnames.startswith("$Format"):
+ if verbose:
+ print("keywords are unexpanded, not using")
+ raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+ refs = set([r.strip() for r in refnames.strip("()").split(",")])
+ # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+ # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+ TAG = "tag: "
+ tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+ if not tags:
+ # Either we're using git < 1.8.3, or there really are no tags. We use
+ # a heuristic: assume all version tags have a digit. The old git %d
+ # expansion behaves like git log --decorate=short and strips out the
+ # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+ # between branches and tags. By ignoring refnames without digits, we
+ # filter out many common branch names like "release" and
+ # "stabilization", as well as "HEAD" and "master".
+ tags = set([r for r in refs if re.search(r'\d', r)])
+ if verbose:
+ print("discarding '%s', no digits" % ",".join(refs - tags))
+ if verbose:
+ print("likely tags: %s" % ",".join(sorted(tags)))
+ for ref in sorted(tags):
+ # sorting will prefer e.g. "2.0" over "2.0rc1"
+ if ref.startswith(tag_prefix):
+ r = ref[len(tag_prefix):]
+ if verbose:
+ print("picking %s" % r)
+ return {"version": r,
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False, "error": None,
+ "date": date}
+ # no suitable tags, so version is "0+unknown", but full hex is still there
+ if verbose:
+ print("no suitable tags, using unknown + full revision id")
+ return {"version": "0+unknown",
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False, "error": "no suitable tags", "date": None}
@register_vcs_handler('git', 'pieces_from_vcs')
@@ -81,12 +214,95 @@ def git_pieces_from_vcs(tag_prefix: str, root: str, verbose: bool, runner:
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
- pass
+ GITS = ["git"]
+ if sys.platform == "win32":
+ GITS = ["git.cmd", "git.exe"]
+
+ out, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+ hide_stderr=True)
+ if rc != 0:
+ if verbose:
+ print("Directory %s not under git control" % root)
+ raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+ # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+ # if there isn't one, this yields HEX[-dirty] (no NUM)
+ describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+ "--always", "--long",
+ "--match", "%s*" % tag_prefix],
+ cwd=root)
+ # --long was added in git-1.5.5
+ if describe_out is None:
+ raise NotThisMethod("'git describe' failed")
+ describe_out = describe_out.strip()
+ full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+ if full_out is None:
+ raise NotThisMethod("'git rev-parse' failed")
+ full_out = full_out.strip()
+
+ pieces = {}
+ pieces["long"] = full_out
+ pieces["short"] = full_out[:7] # maybe improved later
+ pieces["error"] = None
+
+ # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+ # TAG might have hyphens.
+ git_describe = describe_out
+
+ # look for -dirty suffix
+ dirty = git_describe.endswith("-dirty")
+ pieces["dirty"] = dirty
+ if dirty:
+ git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+ # now we have TAG-NUM-gHEX or HEX
+
+ if "-" in git_describe:
+ # TAG-NUM-gHEX
+ mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+ if not mo:
+ # unparseable. Maybe git-describe is misbehaving?
+ pieces["error"] = ("unable to parse git-describe output: '%s'"
+ % describe_out)
+ return pieces
+
+ # tag
+ full_tag = mo.group(1)
+ if not full_tag.startswith(tag_prefix):
+ if verbose:
+ fmt = "tag '%s' doesn't start with prefix '%s'"
+ print(fmt % (full_tag, tag_prefix))
+ pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+ % (full_tag, tag_prefix))
+ return pieces
+ pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+ # distance: number of commits since tag
+ pieces["distance"] = int(mo.group(2))
+
+ # commit: short hex revision ID
+ pieces["short"] = mo.group(3)
+
+ else:
+ # HEX: no tags
+ pieces["closest-tag"] = None
+ count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"],
+ cwd=root)
+ pieces["distance"] = int(count_out) # total number of commits
+
+ # commit date: see ISO-8601 comment in git_versions_from_keywords()
+ date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"],
+ cwd=root)[0].strip()
+ pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+ return pieces
def plus_or_dot(pieces: Dict[str, Any]) ->str:
"""Return a + if we don't already have one, else return a ."""
- pass
+ if "+" in pieces.get("closest-tag", ""):
+ return "."
+ return "+"
def render_pep440(pieces: Dict[str, Any]) ->str:
@@ -98,7 +314,20 @@ def render_pep440(pieces: Dict[str, Any]) ->str:
Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
- pass
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += plus_or_dot(pieces)
+ rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+ pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
def render_pep440_branch(pieces: Dict[str, Any]) ->str:
@@ -110,7 +339,24 @@ def render_pep440_branch(pieces: Dict[str, Any]) ->str:
Exceptions:
1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
"""
- pass
+ if pieces["closest-tag"]:
+ rendered = pieces["closest-tag"]
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".dev0"
+ rendered += plus_or_dot(pieces)
+ rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ else:
+ # exception #1
+ rendered = "0"
+ if pieces["distance"] or pieces["dirty"]:
+ rendered += ".dev0"
+ rendered += "+untagged.%d.g%s" % (pieces["distance"],
+ pieces["short"])
+ if pieces["dirty"]:
+ rendered += ".dirty"
+ return rendered
def pep440_split_post(ver: str) ->Tuple[str, Optional[int]]:
diff --git a/geopandas/array.py b/geopandas/array.py
index 3f1cc54..44ac545 100644
--- a/geopandas/array.py
+++ b/geopandas/array.py
@@ -41,14 +41,26 @@ def _check_crs(left, right, allow_none=False):
If allow_none is True, empty CRS is treated as the same.
"""
- pass
+ if allow_none and (left is None or right is None):
+ return True
+ elif left is None or right is None:
+ return False
+ else:
+ return left == right
def _crs_mismatch_warn(left, right, stacklevel=3):
"""
Raise a CRS mismatch warning with the information on the assigned CRS.
"""
- pass
+ warnings.warn(
+ f"CRS mismatch between the CRS of left geometries and right geometries.\n"
+ f"Left CRS: {left}\n"
+ f"Right CRS: {right}\n"
+ "Use `to_crs()` to reproject geometries to the same CRS before comparison.",
+ UserWarning,
+ stacklevel=stacklevel
+ )
def isna(value):
@@ -58,7 +70,7 @@ def isna(value):
Custom version that only works for scalars (returning True or False),
as `pd.isna` also works for array-like input returning a boolean array.
"""
- pass
+ return value is None or (isinstance(value, float) and np.isnan(value)) or pd.isna(value)
def from_shapely(data, crs=None):
@@ -77,14 +89,30 @@ def from_shapely(data, crs=None):
such as an authority string (eg "EPSG:4326") or a WKT string.
"""
- pass
+ if not isinstance(data, np.ndarray):
+ data = np.array(data, dtype=object)
+
+ if data.ndim != 1:
+ raise ValueError("Only 1-dimensional input is supported")
+
+ # Validate that all elements are shapely geometries or None
+ for geom in data:
+ if geom is not None and not isinstance(geom, BaseGeometry):
+ raise TypeError(f"Invalid geometry object {geom}")
+
+ return GeometryArray(data, crs=crs)
def to_shapely(geoms):
"""
Convert GeometryArray to numpy object array of shapely objects.
"""
- pass
+ if isinstance(geoms, GeometryArray):
+ return geoms._data
+ elif isinstance(geoms, np.ndarray):
+ return geoms
+ else:
+ raise TypeError("Input must be a GeometryArray or numpy array")
def from_wkb(data, crs=None, on_invalid='raise'):
@@ -106,14 +134,48 @@ def from_wkb(data, crs=None, on_invalid='raise'):
- ignore: invalid WKB geometries will be returned as None without a warning.
"""
- pass
+ import shapely.wkb
+
+ if not isinstance(data, np.ndarray):
+ data = np.array(data, dtype=object)
+
+ geoms = []
+ for wkb in data:
+ try:
+ geom = shapely.wkb.loads(wkb)
+ geoms.append(geom)
+ except Exception as e:
+ if on_invalid == 'raise':
+ raise ValueError(f"Invalid WKB geometry: {e}")
+ elif on_invalid == 'warn':
+ warnings.warn(f"Invalid WKB geometry: {e}", UserWarning)
+ geoms.append(None)
+ elif on_invalid == 'ignore':
+ geoms.append(None)
+ else:
+ raise ValueError("Invalid value for on_invalid")
+
+ return GeometryArray(np.array(geoms, dtype=object), crs=crs)
def to_wkb(geoms, hex=False, **kwargs):
"""
Convert GeometryArray to a numpy object array of WKB objects.
"""
- pass
+ import shapely.wkb
+
+ if isinstance(geoms, GeometryArray):
+ geoms = geoms._data
+
+ wkb_objects = []
+ for geom in geoms:
+ if geom is None:
+ wkb_objects.append(None)
+ else:
+ wkb = shapely.wkb.dumps(geom, hex=hex, **kwargs)
+ wkb_objects.append(wkb)
+
+ return np.array(wkb_objects, dtype=object)
def from_wkt(data, crs=None, on_invalid='raise'):
diff --git a/geopandas/explore.py b/geopandas/explore.py
index dbf38bd..269566f 100644
--- a/geopandas/explore.py
+++ b/geopandas/explore.py
@@ -227,7 +227,24 @@ def _explore(df, column=None, cmap=None, color=None, m=None, tiles=
def _tooltip_popup(type, fields, gdf, **kwds):
"""get tooltip or popup"""
- pass
+ from folium.features import GeoJsonTooltip, GeoJsonPopup
+
+ if isinstance(fields, bool):
+ if fields:
+ fields = gdf.columns.drop(gdf.geometry.name).tolist()
+ else:
+ return None
+ elif isinstance(fields, int):
+ fields = gdf.columns.drop(gdf.geometry.name).tolist()[:fields]
+ elif isinstance(fields, str):
+ fields = [fields]
+
+ if type == 'tooltip':
+ return GeoJsonTooltip(fields=fields, **kwds)
+ elif type == 'popup':
+ return GeoJsonPopup(fields=fields, **kwds)
+ else:
+ raise ValueError("Type must be either 'tooltip' or 'popup'")
def _categorical_legend(m, title, categories, colors):
@@ -251,7 +268,52 @@ def _categorical_legend(m, title, categories, colors):
colors : list-like
list of colors (in the same order as categories)
"""
- pass
+ from branca.element import Template, MacroElement
+
+ template = """
+ {% macro html(this, kwargs) %}
+ <div style="
+ position: fixed;
+ bottom: 50px;
+ right: 50px;
+ width: 120px;
+ height: 90px;
+ z-index:9999;
+ font-size:14px;
+ ">
+ <p><a style="color: #0000ff; text-decoration: none; font-weight: bold; font-size: 14px;" href="#" onclick="toggle_legend(); return false;">Toggle Legend</a></p>
+ <div id="legend" style="background-color: #fff; padding: 10px; display:none;">
+ <div style='padding:3px; font-weight: bold;'>
+ {{ this.title }}
+ </div>
+ {% for label, color in this.categories_colors %}
+ <div>
+ <span style='background-color: {{ color }}; border: 1px solid #000; display: inline-block; width: 12px; height: 12px;'></span>
+ <span style='padding-left:5px;'>{{ label }}</span>
+ </div>
+ {% endfor %}
+ </div>
+ </div>
+
+ <script>
+ function toggle_legend() {
+ var legend = document.getElementById('legend');
+ if (legend.style.display === 'none') {
+ legend.style.display = 'block';
+ } else {
+ legend.style.display = 'none';
+ }
+ }
+ </script>
+ {% endmacro %}
+ """
+
+ macro = MacroElement()
+ macro._template = Template(template)
+ macro.title = title
+ macro.categories_colors = list(zip(categories, colors))
+
+ m.get_root().add_child(macro)
def _explore_geoseries(s, color=None, m=None, tiles='OpenStreetMap', attr=
diff --git a/geopandas/geoseries.py b/geopandas/geoseries.py
index 38c4ecf..30c4981 100644
--- a/geopandas/geoseries.py
+++ b/geopandas/geoseries.py
@@ -193,7 +193,7 @@ class GeoSeries(GeoPandasBase, Series):
GeoSeries.z
"""
- pass
+ return Series([geom.x if geom else None for geom in self], index=self.index)
@property
def y(self) ->Series:
@@ -221,7 +221,7 @@ class GeoSeries(GeoPandasBase, Series):
GeoSeries.z
"""
- pass
+ return Series([geom.y if geom else None for geom in self], index=self.index)
@property
def z(self) ->Series:
@@ -249,7 +249,7 @@ class GeoSeries(GeoPandasBase, Series):
GeoSeries.y
"""
- pass
+ return Series([geom.z if geom and hasattr(geom, 'z') else None for geom in self], index=self.index)
@classmethod
def from_file(cls, filename: (os.PathLike | typing.IO), **kwargs
@@ -602,7 +602,7 @@ class GeoSeries(GeoPandasBase, Series):
GeoSeries.notna : inverse of isna
GeoSeries.is_empty : detect empty geometries
"""
- pass
+ return Series([geom is None for geom in self], index=self.index)
def isnull(self) ->Series:
"""Alias for `isna` method. See `isna` for more detail."""
@@ -647,7 +647,7 @@ class GeoSeries(GeoPandasBase, Series):
GeoSeries.isna : inverse of notna
GeoSeries.is_empty : detect empty geometries
"""
- pass
+ return Series([geom is not None for geom in self], index=self.index)
def notnull(self) ->Series:
"""Alias for `notna` method. See `notna` for more detail."""
diff --git a/geopandas/io/_geoarrow.py b/geopandas/io/_geoarrow.py
index cb4401f..b8eb457 100644
--- a/geopandas/io/_geoarrow.py
+++ b/geopandas/io/_geoarrow.py
@@ -101,7 +101,52 @@ def geopandas_to_arrow(df, index=None, geometry_encoding='WKB', interleaved
specify the keyword).
"""
- pass
+ if not isinstance(df, GeoDataFrame):
+ raise ValueError("Input must be a GeoDataFrame")
+
+ # Handle index
+ if index is None:
+ index = not isinstance(df.index, pd.RangeIndex)
+
+ # Convert DataFrame to Arrow table
+ table = pa.Table.from_pandas(df, preserve_index=index)
+
+ # Handle geometry column
+ geom_col = df.geometry.name
+ geom_array = df.geometry.values
+
+ if geometry_encoding == 'WKB':
+ wkb_array = pa.array(geom_array.to_wkb())
+ field = pa.field(geom_col, pa.binary())
+ table = table.set_column(table.schema.get_field_index(geom_col), field, wkb_array)
+ elif geometry_encoding == 'geoarrow':
+ if include_z is None:
+ include_z = geom_array.has_z.any()
+
+ coord_type = pa.float64()
+ if interleaved:
+ coords = [g.coords[:] for g in geom_array]
+ if include_z:
+ coords_array = pa.list_(pa.list_(coord_type, 3))
+ else:
+ coords_array = pa.list_(pa.list_(coord_type, 2))
+ coords_array = pa.array(coords, type=coords_array)
+ else:
+ x, y = zip(*[(c[0], c[1]) for g in geom_array for c in g.coords])
+ if include_z:
+ z = [c[2] if len(c) > 2 else float('nan') for g in geom_array for c in g.coords]
+ coords_array = pa.StructArray.from_arrays([pa.array(x), pa.array(y), pa.array(z)], ['x', 'y', 'z'])
+ else:
+ coords_array = pa.StructArray.from_arrays([pa.array(x), pa.array(y)], ['x', 'y'])
+
+ geom_type = pa.array([g.geom_type for g in geom_array], pa.string())
+ field = pa.field(geom_col, pa.struct([('type', pa.string()), ('coordinates', coords_array.type)]))
+ geoarrow_array = pa.StructArray.from_arrays([geom_type, coords_array], ['type', 'coordinates'])
+ table = table.set_column(table.schema.get_field_index(geom_col), field, geoarrow_array)
+ else:
+ raise ValueError("Invalid geometry_encoding. Must be 'WKB' or 'geoarrow'")
+
+ return table
def arrow_to_geopandas(table, geometry=None):
@@ -121,7 +166,55 @@ def arrow_to_geopandas(table, geometry=None):
GeoDataFrame
"""
- pass
+ if not isinstance(table, pa.Table):
+ raise ValueError("Input must be a pyarrow.Table")
+
+ # Convert Arrow table to pandas DataFrame
+ df = table.to_pandas()
+
+ # Find geometry column
+ if geometry is None:
+ geometry_columns = [field.name for field in table.schema if
+ isinstance(field.type, pa.BinaryType) or
+ (isinstance(field.type, pa.StructType) and 'type' in field.type.names and 'coordinates' in field.type.names)]
+ if not geometry_columns:
+ raise ValueError("No geometry column found in the Arrow table")
+ geometry = geometry_columns[0]
+ elif geometry not in table.column_names:
+ raise ValueError(f"Specified geometry column '{geometry}' not found in the Arrow table")
+
+ # Convert geometry column
+ if isinstance(table.field(geometry).type, pa.BinaryType):
+ # WKB encoding
+ df[geometry] = from_wkb(df[geometry])
+ elif isinstance(table.field(geometry).type, pa.StructType):
+ # GeoArrow encoding
+ geom_array = table[geometry]
+ geom_type = geom_array.field('type').to_pylist()
+ coords = geom_array.field('coordinates').to_pylist()
+
+ geometries = []
+ for gtype, coord in zip(geom_type, coords):
+ if gtype == 'Point':
+ geometries.append(shapely.Point(coord[0]))
+ elif gtype == 'LineString':
+ geometries.append(shapely.LineString(coord))
+ elif gtype == 'Polygon':
+ geometries.append(shapely.Polygon(coord[0], coord[1:]))
+ elif gtype == 'MultiPoint':
+ geometries.append(shapely.MultiPoint(coord))
+ elif gtype == 'MultiLineString':
+ geometries.append(shapely.MultiLineString(coord))
+ elif gtype == 'MultiPolygon':
+ geometries.append(shapely.MultiPolygon([shapely.Polygon(p[0], p[1:]) for p in coord]))
+ else:
+ raise ValueError(f"Unsupported geometry type: {gtype}")
+
+ df[geometry] = from_shapely(geometries)
+ else:
+ raise ValueError(f"Unsupported geometry encoding for column '{geometry}'")
+
+ return GeoDataFrame(df, geometry=geometry)
def arrow_to_geometry_array(arr):
@@ -131,7 +224,34 @@ def arrow_to_geometry_array(arr):
Specifically for GeoSeries.from_arrow.
"""
- pass
+ if isinstance(arr, pa.BinaryArray):
+ # WKB encoding
+ return from_wkb(arr.to_pylist())
+ elif isinstance(arr, pa.StructArray):
+ # GeoArrow encoding
+ geom_type = arr.field('type').to_pylist()
+ coords = arr.field('coordinates').to_pylist()
+
+ geometries = []
+ for gtype, coord in zip(geom_type, coords):
+ if gtype == 'Point':
+ geometries.append(shapely.Point(coord[0]))
+ elif gtype == 'LineString':
+ geometries.append(shapely.LineString(coord))
+ elif gtype == 'Polygon':
+ geometries.append(shapely.Polygon(coord[0], coord[1:]))
+ elif gtype == 'MultiPoint':
+ geometries.append(shapely.MultiPoint(coord))
+ elif gtype == 'MultiLineString':
+ geometries.append(shapely.MultiLineString(coord))
+ elif gtype == 'MultiPolygon':
+ geometries.append(shapely.MultiPolygon([shapely.Polygon(p[0], p[1:]) for p in coord]))
+ else:
+ raise ValueError(f"Unsupported geometry type: {gtype}")
+
+ return from_shapely(geometries)
+ else:
+ raise ValueError("Unsupported Arrow array type for geometry conversion")
def construct_shapely_array(arr: pa.Array, extension_name: str):
@@ -140,4 +260,28 @@ def construct_shapely_array(arr: pa.Array, extension_name: str):
with GeoArrow extension type.
"""
- pass
+ if not isinstance(arr, pa.Array):
+ raise ValueError("Input must be a pyarrow.Array")
+
+ if extension_name not in GEOARROW_ENCODINGS:
+ raise ValueError(f"Unsupported GeoArrow encoding: {extension_name}")
+
+ geom_type = GeometryType[extension_name.upper()]
+ coords = arr.field('coordinates').to_pylist()
+
+ geometries = []
+ for coord in coords:
+ if geom_type == GeometryType.POINT:
+ geometries.append(shapely.Point(coord))
+ elif geom_type == GeometryType.LINESTRING:
+ geometries.append(shapely.LineString(coord))
+ elif geom_type == GeometryType.POLYGON:
+ geometries.append(shapely.Polygon(coord[0], coord[1:]))
+ elif geom_type == GeometryType.MULTIPOINT:
+ geometries.append(shapely.MultiPoint(coord))
+ elif geom_type == GeometryType.MULTILINESTRING:
+ geometries.append(shapely.MultiLineString(coord))
+ elif geom_type == GeometryType.MULTIPOLYGON:
+ geometries.append(shapely.MultiPolygon([shapely.Polygon(p[0], p[1:]) for p in coord]))
+
+ return np.array(geometries, dtype=object)
diff --git a/geopandas/io/arrow.py b/geopandas/io/arrow.py
index defcba9..52f9590 100644
--- a/geopandas/io/arrow.py
+++ b/geopandas/io/arrow.py
@@ -26,7 +26,22 @@ def _remove_id_from_member_of_ensembles(json_dict):
Mimicking the patch to GDAL from https://github.com/OSGeo/gdal/pull/5872
"""
- pass
+ if isinstance(json_dict, dict):
+ if "datum" in json_dict:
+ datum = json_dict["datum"]
+ if isinstance(datum, dict) and "ensemble" in datum:
+ ensemble = datum["ensemble"]
+ if isinstance(ensemble, dict) and "members" in ensemble:
+ members = ensemble["members"]
+ if isinstance(members, list):
+ for member in members:
+ if isinstance(member, dict):
+ member.pop("id", None)
+ for value in json_dict.values():
+ _remove_id_from_member_of_ensembles(value)
+ elif isinstance(json_dict, list):
+ for item in json_dict:
+ _remove_id_from_member_of_ensembles(item)
_geometry_type_names = ['Point', 'LineString', 'LineString', 'Polygon',
@@ -39,7 +54,7 @@ def _get_geometry_types(series):
"""
Get unique geometry types from a GeoSeries.
"""
- pass
+ return list(series.geom_type.unique())
def _create_metadata(df, schema_version=None, geometry_encoding=None,
@@ -61,7 +76,40 @@ def _create_metadata(df, schema_version=None, geometry_encoding=None,
-------
dict
"""
- pass
+ if schema_version is None:
+ schema_version = SUPPORTED_VERSIONS[-1]
+
+ if schema_version not in SUPPORTED_VERSIONS:
+ raise ValueError(f"Unsupported schema version: {schema_version}")
+
+ geometry_columns = df.select_dtypes(include=['geometry']).columns
+ if len(geometry_columns) == 0:
+ raise ValueError("No geometry column found in GeoDataFrame")
+
+ primary_geometry = df.geometry.name
+
+ metadata = {
+ "version": schema_version,
+ "primary_column": primary_geometry,
+ "columns": {}
+ }
+
+ for col in geometry_columns:
+ col_metadata = {
+ "encoding": geometry_encoding or "WKB",
+ "geometry_types": _get_geometry_types(df[col])
+ }
+
+ if df[col].crs:
+ col_metadata["crs"] = df[col].crs.to_wkt()
+
+ if write_covering_bbox:
+ bounds = df[col].total_bounds
+ col_metadata["bbox"] = [bounds[0], bounds[1], bounds[2], bounds[3]]
+
+ metadata["columns"][col] = col_metadata
+
+ return metadata
def _encode_metadata(metadata):
diff --git a/geopandas/io/file.py b/geopandas/io/file.py
index 43101f2..7438f90 100644
--- a/geopandas/io/file.py
+++ b/geopandas/io/file.py
@@ -36,12 +36,15 @@ _EXTENSION_TO_DRIVER = {'.bna': 'BNA', '.dxf': 'DXF', '.csv': 'CSV', '.shp':
def _expand_user(path):
"""Expand paths that use ~."""
- pass
+ return os.path.expanduser(path)
def _is_url(url):
"""Check to see if *url* has a valid protocol."""
- pass
+ try:
+ return parse_url(url).scheme in _VALID_URLS
+ except Exception:
+ return False
def _read_file(filename, bbox=None, mask=None, columns=None, rows=None,
@@ -88,27 +91,6 @@ def _read_file(filename, bbox=None, mask=None, columns=None, rows=None,
arguments are passed to fiona.open`. For more information on possible
keywords, type: ``import pyogrio; help(pyogrio.write_dataframe)``.
-
- Examples
- --------
- >>> df = geopandas.read_file("nybb.shp") # doctest: +SKIP
-
- Specifying layer of GPKG:
-
- >>> df = geopandas.read_file("file.gpkg", layer='cities') # doctest: +SKIP
-
- Reading only first 10 rows:
-
- >>> df = geopandas.read_file("nybb.shp", rows=10) # doctest: +SKIP
-
- Reading only geometries intersecting ``mask``:
-
- >>> df = geopandas.read_file("nybb.shp", mask=polygon) # doctest: +SKIP
-
- Reading only geometries intersecting ``bbox``:
-
- >>> df = geopandas.read_file("nybb.shp", bbox=(0, 0, 10, 20)) # doctest: +SKIP
-
Returns
-------
:obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` :
@@ -131,14 +113,47 @@ def _read_file(filename, bbox=None, mask=None, columns=None, rows=None,
(https://gdal.org/user/virtual_file_systems.html#vsicurl-http-https-ftp-files-random-access).
"""
- pass
+ if engine is None:
+ engine = "pyogrio" if pyogrio is not None else "fiona"
+
+ if engine == "pyogrio":
+ if pyogrio is None:
+ raise ImportError("pyogrio is required to use the pyogrio engine")
+ return pyogrio.read_dataframe(filename, bbox=bbox, mask=mask, columns=columns, rows=rows, **kwargs)
+ elif engine == "fiona":
+ if fiona is None:
+ raise ImportError("fiona is required to use the fiona engine")
+ with fiona.open(filename, **kwargs) as source:
+ crs = source.crs
+ driver = source.driver
+ if columns is None:
+ columns = list(source.schema['properties'].keys())
+ if bbox is not None:
+ source = source.filter(bbox=bbox)
+ if mask is not None:
+ source = source.filter(mask=mask)
+ if rows is not None:
+ if isinstance(rows, int):
+ source = list(source)[:rows]
+ elif isinstance(rows, slice):
+ source = list(source)[rows]
+ else:
+ raise ValueError("rows must be an integer or a slice object")
+ gdf = GeoDataFrame.from_features(source, crs=crs, columns=columns)
+ gdf.crs = crs
+ return gdf
+ else:
+ raise ValueError("engine must be either 'pyogrio' or 'fiona'")
def _detect_driver(path):
"""
Attempt to auto-detect driver based on the extension
"""
- pass
+ try:
+ return _EXTENSION_TO_DRIVER[os.path.splitext(path)[1].lower()]
+ except KeyError:
+ return None
def _to_file(df, filename, driver=None, schema=None, index=None, mode='w',
@@ -214,14 +229,40 @@ def _to_file(df, filename, driver=None, schema=None, index=None, mode='w',
may fail. In this case, the proper encoding can be specified explicitly
by using the encoding keyword parameter, e.g. ``encoding='utf-8'``.
"""
- pass
+ if engine is None:
+ engine = "pyogrio" if pyogrio is not None else "fiona"
+
+ if driver is None:
+ driver = _detect_driver(filename)
+
+ if engine == "pyogrio":
+ if pyogrio is None:
+ raise ImportError("pyogrio is required to use the pyogrio engine")
+ pyogrio.write_dataframe(df, filename, driver=driver, crs=crs, mode=mode, metadata=metadata, **kwargs)
+ elif engine == "fiona":
+ if fiona is None:
+ raise ImportError("fiona is required to use the fiona engine")
+ if schema is None:
+ schema = _geometry_types(df)
+ with fiona.open(filename, mode, driver=driver, crs=crs, schema=schema, **kwargs) as colxn:
+ colxn.writerecords(df.iterfeatures())
+ if metadata:
+ colxn.update_metadata(metadata)
+ else:
+ raise ValueError("engine must be either 'pyogrio' or 'fiona'")
def _geometry_types(df):
"""
Determine the geometry types in the GeoDataFrame for the schema.
"""
- pass
+ geom_types = set(df.geometry.geom_type)
+ if len(geom_types) == 1:
+ return list(geom_types)[0]
+ elif len(geom_types) > 1:
+ return "GeometryCollection"
+ else:
+ return None
def _list_layers(filename) ->pd.DataFrame:
@@ -245,4 +286,11 @@ def _list_layers(filename) ->pd.DataFrame:
pandas.DataFrame
A DataFrame with columns "name" and "geometry_type" and one row per layer.
"""
- pass
+ if pyogrio is not None:
+ return pyogrio.list_layers(filename)
+ elif fiona is not None:
+ with fiona.open(filename) as src:
+ layers = [{"name": layer, "geometry_type": src.schema["geometry"]} for layer in src.layers]
+ return pd.DataFrame(layers)
+ else:
+ raise ImportError("Either pyogrio or fiona is required to list layers")
diff --git a/geopandas/io/sql.py b/geopandas/io/sql.py
index 1255461..b57d5a0 100644
--- a/geopandas/io/sql.py
+++ b/geopandas/io/sql.py
@@ -24,7 +24,14 @@ def _get_conn(conn_or_engine):
-------
Connection
"""
- pass
+ if hasattr(conn_or_engine, 'begin'):
+ # It's an Engine
+ with conn_or_engine.begin() as conn:
+ yield conn
+ else:
+ # It's a Connection
+ with conn_or_engine.begin():
+ yield conn_or_engine
def _df_to_geodf(df, geom_col='geom', crs=None, con=None):
@@ -50,7 +57,22 @@ def _df_to_geodf(df, geom_col='geom', crs=None, con=None):
-------
GeoDataFrame
"""
- pass
+ if geom_col not in df:
+ raise ValueError(f"Column {geom_col} not found in DataFrame")
+
+ df[geom_col] = df[geom_col].apply(lambda x: shapely.wkb.loads(x, hex=True))
+
+ gdf = GeoDataFrame(df, geometry=geom_col, crs=crs)
+
+ if crs is None and con is not None:
+ # Try to determine CRS from the database
+ with _get_conn(con) as conn:
+ query = f"SELECT ST_SRID({geom_col}) FROM ({df.name}) AS t LIMIT 1"
+ srid = conn.execute(query).scalar()
+ if srid:
+ gdf.crs = f"EPSG:{srid}"
+
+ return gdf
def _read_postgis(sql, con, geom_col='geom', crs=None, index_col=None,
@@ -102,7 +124,25 @@ def _read_postgis(sql, con, geom_col='geom', crs=None, index_col=None,
>>> sql = "SELECT ST_AsBinary(geom) AS geom, highway FROM roads"
>>> df = geopandas.read_postgis(sql, con) # doctest: +SKIP
"""
- pass
+ if not isinstance(sql, str):
+ raise ValueError("sql must be a string")
+
+ with _get_conn(con) as conn:
+ if chunksize is not None:
+ df_iter = pd.read_sql(
+ sql, conn, index_col=index_col, coerce_float=coerce_float,
+ params=params, parse_dates=parse_dates, chunksize=chunksize
+ )
+ return (
+ _df_to_geodf(df, geom_col, crs, conn)
+ for df in df_iter
+ )
+ else:
+ df = pd.read_sql(
+ sql, conn, index_col=index_col, coerce_float=coerce_float,
+ params=params, parse_dates=parse_dates
+ )
+ return _df_to_geodf(df, geom_col, crs, conn)
def _get_geometry_type(gdf):
@@ -124,19 +164,37 @@ def _get_geometry_type(gdf):
- if any of the geometries has Z-coordinate, all records will
be written with 3D.
"""
- pass
+ geom_types = set(gdf.geometry.type)
+
+ if len(geom_types) == 1:
+ geom_type = geom_types.pop()
+ if geom_type == 'LinearRing':
+ return 'LineString'
+ return geom_type
+
+ if geom_types.issubset({'Polygon', 'MultiPolygon'}):
+ return 'Polygon'
+ if geom_types.issubset({'Point', 'LineString'}):
+ return 'Geometry'
+
+ return 'Geometry'
def _get_srid_from_crs(gdf):
"""
Get EPSG code from CRS if available. If not, return 0.
"""
- pass
+ if gdf.crs is None:
+ return 0
+ try:
+ return gdf.crs.to_epsg() or 0
+ except:
+ return 0
def _convert_to_ewkb(gdf, geom_name, srid):
"""Convert geometries to ewkb."""
- pass
+ return gdf[geom_name].apply(lambda geom: shapely.wkb.dumps(geom, hex=True, srid=srid))
def _write_postgis(gdf, name, con, schema=None, if_exists='fail', index=
@@ -183,4 +241,49 @@ def _write_postgis(gdf, name, con, schema=None, if_exists='fail', index=
>>> engine = create_engine("postgresql://myusername:mypassword@myhost:5432/mydatabase";) # doctest: +SKIP
>>> gdf.to_postgis("my_table", engine) # doctest: +SKIP
"""
- pass
+ from sqlalchemy.types import VARCHAR, FLOAT, INTEGER, BOOLEAN, DATE, DATETIME
+
+ if not pd.io.sql.is_sqlalchemy_connectable(con):
+ raise ValueError("The connection must be a SQLAlchemy connectable.")
+
+ # Get geometry column name
+ geom_col = gdf.geometry.name
+
+ # Get geometry type
+ geom_type = _get_geometry_type(gdf)
+
+ # Get SRID
+ srid = _get_srid_from_crs(gdf)
+
+ # Convert geometries to EWKB
+ gdf = gdf.copy()
+ gdf[geom_col] = _convert_to_ewkb(gdf, geom_col, srid)
+
+ # Prepare column types
+ if dtype is None:
+ dtype = {}
+ for column, dtype in gdf.dtypes.items():
+ if column == geom_col:
+ continue
+ if dtype == 'object':
+ dtype[column] = VARCHAR
+ elif dtype == 'float64':
+ dtype[column] = FLOAT
+ elif dtype == 'int64':
+ dtype[column] = INTEGER
+ elif dtype == 'bool':
+ dtype[column] = BOOLEAN
+ elif dtype == 'datetime64[ns]':
+ dtype[column] = DATETIME
+ elif dtype == 'date':
+ dtype[column] = DATE
+
+ # Add geometry column type
+ from geoalchemy2 import Geometry
+ dtype[geom_col] = Geometry(geometry_type=geom_type, srid=srid)
+
+ # Write to PostGIS
+ with _get_conn(con) as connection:
+ gdf.to_sql(name, connection, schema=schema, if_exists=if_exists,
+ index=index, index_label=index_label, chunksize=chunksize,
+ dtype=dtype)
diff --git a/geopandas/io/tests/generate_legacy_storage_files.py b/geopandas/io/tests/generate_legacy_storage_files.py
index 9b4e042..4fcced4 100644
--- a/geopandas/io/tests/generate_legacy_storage_files.py
+++ b/geopandas/io/tests/generate_legacy_storage_files.py
@@ -29,8 +29,43 @@ import geopandas
def create_pickle_data():
"""create the pickle data"""
- pass
+ # Create a simple GeoDataFrame
+ df = pd.DataFrame({
+ 'name': ['Point A', 'Point B', 'Point C'],
+ 'value': [1, 2, 3]
+ })
+ geometry = [Point(0, 0), Point(1, 1), Point(2, 2)]
+ gdf = geopandas.GeoDataFrame(df, geometry=geometry)
+ return gdf
+def main():
+ if len(sys.argv) != 3:
+ print("Usage: python generate_legacy_storage_files.py <output_dir> <storage_format>")
+ sys.exit(1)
+
+ output_dir = sys.argv[1]
+ storage_format = sys.argv[2]
+
+ if storage_format != 'pickle':
+ print("Only 'pickle' storage format is supported.")
+ sys.exit(1)
+
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+
+ data = create_pickle_data()
+
+ # Generate filename based on Python version and platform
+ py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
+ platform_name = platform.platform().lower()
+ filename = f"gdf-{py_version}-{platform_name}.pickle"
+
+ filepath = os.path.join(output_dir, filename)
+
+ with open(filepath, 'wb') as f:
+ pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
+
+ print(f"Pickle file created: {filepath}")
if __name__ == '__main__':
main()
diff --git a/geopandas/io/tests/test_arrow.py b/geopandas/io/tests/test_arrow.py
index a5dbeb1..fbaf7a9 100644
--- a/geopandas/io/tests/test_arrow.py
+++ b/geopandas/io/tests/test_arrow.py
@@ -31,19 +31,55 @@ def test_roundtrip(tmpdir, file_format, test_dataset, request):
"""Writing to parquet should not raise errors, and should not alter original
GeoDataFrame
"""
- pass
+ gdf = request.getfixturevalue(test_dataset)
+ tmp_file = str(tmpdir.join(f"test.{file_format}"))
+
+ if file_format == "parquet":
+ gdf.to_parquet(tmp_file)
+ result = read_parquet(tmp_file)
+ elif file_format == "feather":
+ gdf.to_feather(tmp_file)
+ result = read_feather(tmp_file)
+
+ assert_geodataframe_equal(gdf, result)
def test_index(tmpdir, file_format, naturalearth_lowres):
"""Setting index=`True` should preserve index in output, and
setting index=`False` should drop index from output.
"""
- pass
+ gdf = naturalearth_lowres.set_index('name')
+ tmp_file = str(tmpdir.join(f"test.{file_format}"))
+
+ if file_format == "parquet":
+ gdf.to_parquet(tmp_file, index=True)
+ result_with_index = read_parquet(tmp_file)
+ gdf.to_parquet(tmp_file, index=False)
+ result_without_index = read_parquet(tmp_file)
+ elif file_format == "feather":
+ gdf.to_feather(tmp_file, index=True)
+ result_with_index = read_feather(tmp_file)
+ gdf.to_feather(tmp_file, index=False)
+ result_without_index = read_feather(tmp_file)
+
+ assert_geodataframe_equal(gdf, result_with_index)
+ assert result_without_index.index.name is None
+ assert_geodataframe_equal(gdf.reset_index(drop=True), result_without_index)
def test_column_order(tmpdir, file_format, naturalearth_lowres):
"""The order of columns should be preserved in the output."""
- pass
+ gdf = naturalearth_lowres[['name', 'pop_est', 'continent', 'geometry']]
+ tmp_file = str(tmpdir.join(f"test.{file_format}"))
+
+ if file_format == "parquet":
+ gdf.to_parquet(tmp_file)
+ result = read_parquet(tmp_file)
+ elif file_format == "feather":
+ gdf.to_feather(tmp_file)
+ result = read_feather(tmp_file)
+
+ assert list(gdf.columns) == list(result.columns)
@pytest.mark.parametrize('compression', ['snappy', 'gzip', 'brotli', None])
@@ -51,7 +87,10 @@ def test_parquet_compression(compression, tmpdir, naturalearth_lowres):
"""Using compression options should not raise errors, and should
return identical GeoDataFrame.
"""
- pass
+ tmp_file = str(tmpdir.join("test.parquet"))
+ naturalearth_lowres.to_parquet(tmp_file, compression=compression)
+ result = read_parquet(tmp_file)
+ assert_geodataframe_equal(naturalearth_lowres, result)
@pytest.mark.skipif(Version(pyarrow.__version__) < Version('0.17.0'),
@@ -61,21 +100,43 @@ def test_feather_compression(compression, tmpdir, naturalearth_lowres):
"""Using compression options should not raise errors, and should
return identical GeoDataFrame.
"""
- pass
+ tmp_file = str(tmpdir.join("test.feather"))
+ naturalearth_lowres.to_feather(tmp_file, compression=compression)
+ result = read_feather(tmp_file)
+ assert_geodataframe_equal(naturalearth_lowres, result)
def test_parquet_multiple_geom_cols(tmpdir, file_format, naturalearth_lowres):
"""If multiple geometry columns are present when written to parquet,
they should all be returned as such when read from parquet.
"""
- pass
+ gdf = naturalearth_lowres.copy()
+ gdf['geometry2'] = gdf.geometry.centroid
+ tmp_file = str(tmpdir.join(f"test.{file_format}"))
+
+ if file_format == "parquet":
+ gdf.to_parquet(tmp_file)
+ result = read_parquet(tmp_file)
+ elif file_format == "feather":
+ gdf.to_feather(tmp_file)
+ result = read_feather(tmp_file)
+
+ assert isinstance(result, GeoDataFrame)
+ assert isinstance(result['geometry'], geopandas.GeoSeries)
+ assert isinstance(result['geometry2'], geopandas.GeoSeries)
+ assert_geodataframe_equal(gdf, result)
def test_parquet_missing_metadata(tmpdir, naturalearth_lowres):
"""Missing geo metadata, such as from a parquet file created
from a pandas DataFrame, will raise a ValueError.
"""
- pass
+ df = DataFrame(naturalearth_lowres.drop(columns=['geometry']))
+ tmp_file = str(tmpdir.join("test.parquet"))
+ df.to_parquet(tmp_file)
+
+ with pytest.raises(ValueError, match="Missing geo metadata in Parquet file."):
+ read_parquet(tmp_file)
def test_parquet_missing_metadata2(tmpdir):
@@ -83,7 +144,15 @@ def test_parquet_missing_metadata2(tmpdir):
from a pyarrow Table (which will also not contain pandas metadata),
will raise a ValueError.
"""
- pass
+ table = pyarrow.Table.from_arrays(
+ [pyarrow.array([1, 2, 3]), pyarrow.array(['a', 'b', 'c'])],
+ names=['col1', 'col2']
+ )
+ tmp_file = str(tmpdir.join("test.parquet"))
+ pq.write_table(table, tmp_file)
+
+ with pytest.raises(ValueError, match="Missing geo metadata in Parquet file."):
+ read_parquet(tmp_file)
@pytest.mark.parametrize('geo_meta,error', [({'geo': b''},
@@ -99,34 +168,87 @@ def test_parquet_invalid_metadata(tmpdir, geo_meta, error, naturalearth_lowres
This requires writing the parquet file directly below, so that we can
control the metadata that is written for this test.
"""
- pass
+ tmp_file = str(tmpdir.join("test.parquet"))
+ table = pyarrow.Table.from_pandas(naturalearth_lowres)
+
+ # Write the parquet file with custom metadata
+ pq.write_table(table, tmp_file, metadata=geo_meta)
+
+ with pytest.raises(ValueError, match=error):
+ read_parquet(tmp_file)
def test_subset_columns(tmpdir, file_format, naturalearth_lowres):
"""Reading a subset of columns should correctly decode selected geometry
columns.
"""
- pass
+ gdf = naturalearth_lowres
+ tmp_file = str(tmpdir.join(f"test.{file_format}"))
+
+ if file_format == "parquet":
+ gdf.to_parquet(tmp_file)
+ result = read_parquet(tmp_file, columns=['name', 'geometry'])
+ elif file_format == "feather":
+ gdf.to_feather(tmp_file)
+ result = read_feather(tmp_file, columns=['name', 'geometry'])
+
+ expected = gdf[['name', 'geometry']]
+ assert_geodataframe_equal(expected, result)
def test_promote_secondary_geometry(tmpdir, file_format, naturalearth_lowres):
"""Reading a subset of columns that does not include the primary geometry
column should promote the first geometry column present.
"""
- pass
+ gdf = naturalearth_lowres.copy()
+ gdf['geometry2'] = gdf.geometry.centroid
+ tmp_file = str(tmpdir.join(f"test.{file_format}"))
+
+ if file_format == "parquet":
+ gdf.to_parquet(tmp_file)
+ result = read_parquet(tmp_file, columns=['name', 'geometry2'])
+ elif file_format == "feather":
+ gdf.to_feather(tmp_file)
+ result = read_feather(tmp_file, columns=['name', 'geometry2'])
+
+ assert isinstance(result, GeoDataFrame)
+ assert result.geometry.name == 'geometry2'
+ assert_geoseries_equal(gdf['geometry2'], result.geometry)
def test_columns_no_geometry(tmpdir, file_format, naturalearth_lowres):
"""Reading a parquet file that is missing all of the geometry columns
should raise a ValueError"""
- pass
+ gdf = naturalearth_lowres
+ tmp_file = str(tmpdir.join(f"test.{file_format}"))
+
+ if file_format == "parquet":
+ gdf.to_parquet(tmp_file)
+ with pytest.raises(ValueError, match="No geometry columns found"):
+ read_parquet(tmp_file, columns=['name', 'pop_est'])
+ elif file_format == "feather":
+ gdf.to_feather(tmp_file)
+ with pytest.raises(ValueError, match="No geometry columns found"):
+ read_feather(tmp_file, columns=['name', 'pop_est'])
def test_missing_crs(tmpdir, file_format, naturalearth_lowres):
"""If CRS is `None`, it should be properly handled
and remain `None` when read from parquet`.
"""
- pass
+ gdf = naturalearth_lowres.copy()
+ gdf.crs = None
+ tmp_file = str(tmpdir.join(f"test.{file_format}"))
+
+ if file_format == "parquet":
+ gdf.to_parquet(tmp_file)
+ result = read_parquet(tmp_file)
+ elif file_format == "feather":
+ gdf.to_feather(tmp_file)
+ result = read_feather(tmp_file)
+
+ assert result.crs is None
+ assert_geodataframe_equal(gdf, result)
@pytest.mark.parametrize('version', ['0.1.0', '0.4.0', '1.0.0-beta.1'])
@@ -145,7 +267,17 @@ def test_read_versioned_file(version):
df.to_feather(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.feather')
df.to_parquet(DATA_PATH / 'arrow' / f'test_data_v{METADATA_VERSION}.parquet')
"""
- pass
+ feather_file = DATA_PATH / 'arrow' / f'test_data_v{version}.feather'
+ parquet_file = DATA_PATH / 'arrow' / f'test_data_v{version}.parquet'
+
+ gdf_feather = read_feather(feather_file)
+ gdf_parquet = read_parquet(parquet_file)
+
+ assert isinstance(gdf_feather, GeoDataFrame)
+ assert isinstance(gdf_parquet, GeoDataFrame)
+ assert gdf_feather.crs == "EPSG:4326"
+ assert gdf_parquet.crs == "EPSG:4326"
+ assert_geodataframe_equal(gdf_feather, gdf_parquet)
def test_read_gdal_files():
diff --git a/geopandas/io/tests/test_sql.py b/geopandas/io/tests/test_sql.py
index 00f0209..d4eba19 100644
--- a/geopandas/io/tests/test_sql.py
+++ b/geopandas/io/tests/test_sql.py
@@ -27,7 +27,12 @@ def check_available_postgis_drivers() ->list[str]:
This prevents tests running if the relevant package isn't installed
(rather than being skipped, as skips are treated as failures during postgis CI)
"""
- pass
+ available_drivers = []
+ if find_spec("psycopg2"):
+ available_drivers.append("psycopg2")
+ if find_spec("psycopg"):
+ available_drivers.append("psycopg")
+ return available_drivers
POSTGIS_DRIVERS = check_available_postgis_drivers()
@@ -35,7 +40,13 @@ POSTGIS_DRIVERS = check_available_postgis_drivers()
def prepare_database_credentials() ->dict:
"""Gather postgres connection credentials from environment variables."""
- pass
+ return {
+ "dbname": os.environ.get("PGDATABASE", "test_geopandas"),
+ "user": os.environ.get("PGUSER", "postgres"),
+ "password": os.environ.get("PGPASSWORD", ""),
+ "host": os.environ.get("PGHOST", "localhost"),
+ "port": os.environ.get("PGPORT", 5432),
+ }
@pytest.fixture()
@@ -43,7 +54,20 @@ def connection_postgis(request):
"""Create a postgres connection using either psycopg2 or psycopg.
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS."""
- pass
+ driver = request.param
+ credentials = prepare_database_credentials()
+
+ if driver == "psycopg2":
+ import psycopg2
+ conn = psycopg2.connect(**credentials)
+ elif driver == "psycopg":
+ import psycopg
+ conn = psycopg.connect(**credentials)
+ else:
+ raise ValueError(f"Unsupported driver: {driver}")
+
+ yield conn
+ conn.close()
@pytest.fixture()
@@ -53,7 +77,20 @@ def engine_postgis(request):
Use this as an indirect fixture, where the request parameter is POSTGIS_DRIVERS.
"""
- pass
+ from sqlalchemy import create_engine
+
+ driver = request.param
+ credentials = prepare_database_credentials()
+
+ if driver == "psycopg2":
+ engine = create_engine(f"postgresql+psycopg2://{credentials['user']}:{credentials['password']}@{credentials['host']}:{credentials['port']}/{credentials['dbname']}")
+ elif driver == "psycopg":
+ engine = create_engine(f"postgresql+psycopg://{credentials['user']}:{credentials['password']}@{credentials['host']}:{credentials['port']}/{credentials['dbname']}")
+ else:
+ raise ValueError(f"Unsupported driver: {driver}")
+
+ yield engine
+ engine.dispose()
@pytest.fixture()
@@ -72,7 +109,18 @@ def connection_spatialite():
``AttributeError`` on missing support for loadable SQLite extensions
``sqlite3.OperationalError`` on missing SpatiaLite
"""
- pass
+ import sqlite3
+
+ try:
+ conn = sqlite3.connect(":memory:")
+ conn.enable_load_extension(True)
+ conn.load_extension("mod_spatialite")
+ except (AttributeError, sqlite3.OperationalError) as e:
+ pytest.skip(f"Unable to load SpatiaLite extension: {str(e)}")
+
+ conn.execute("SELECT InitSpatialMetadata(1)")
+ yield conn
+ conn.close()
class TestIO:
diff --git a/geopandas/io/util.py b/geopandas/io/util.py
index a13ec40..041f4e7 100644
--- a/geopandas/io/util.py
+++ b/geopandas/io/util.py
@@ -9,7 +9,8 @@ def vsi_path(path: str) ->str:
Ensure path is a local path or a GDAL-compatible vsi path.
"""
- pass
+ parsed = _parse_uri(path)
+ return _construct_vsi_path(*parsed)
SCHEMES = {'file': 'file', 'zip': 'zip', 'tar': 'tar', 'gzip': 'gzip',
@@ -33,9 +34,35 @@ def _parse_uri(path: str):
scheme : str
URI scheme such as "https" or "zip+s3".
"""
- pass
+ parsed = urlparse(path)
+ scheme = parsed.scheme.lower()
+ archive = None
+
+ if '+' in scheme:
+ archive_scheme, inner_scheme = scheme.split('+', 1)
+ if archive_scheme in SCHEMES and inner_scheme in SCHEMES:
+ archive = f"/vsi{SCHEMES[archive_scheme]}/{parsed.netloc}{parsed.path}"
+ scheme = inner_scheme
+ else:
+ scheme = parsed.scheme
+
+ if scheme in CURLSCHEMES:
+ path = f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
+ if parsed.query:
+ path += f"?{parsed.query}"
+ elif scheme in SCHEMES:
+ path = f"{parsed.netloc}{parsed.path}"
+ else:
+ path = parsed.path
+
+ return path, archive, scheme
def _construct_vsi_path(path, archive, scheme) ->str:
"""Convert a parsed path to a GDAL VSI path"""
- pass
+ if archive:
+ return f"/vsi{SCHEMES[scheme]}/{archive}/{path}"
+ elif scheme in SCHEMES:
+ return f"/vsi{SCHEMES[scheme]}/{path}"
+ else:
+ return path
diff --git a/geopandas/plotting.py b/geopandas/plotting.py
index 974bdc1..f8f4447 100644
--- a/geopandas/plotting.py
+++ b/geopandas/plotting.py
@@ -25,7 +25,21 @@ def _sanitize_geoms(geoms, prefix='Multi'):
component_index : index array
indices are repeated for all components in the same Multi geometry
"""
- pass
+ components = []
+ component_index = []
+
+ for idx, geom in enumerate(geoms):
+ if geom is None or geom.is_empty:
+ continue
+ if geom.type.startswith(prefix):
+ for part in geom.geoms:
+ components.append(part)
+ component_index.append(idx)
+ else:
+ components.append(geom)
+ component_index.append(idx)
+
+ return components, component_index
def _expand_kwargs(kwargs, multiindex):
@@ -35,7 +49,12 @@ def _expand_kwargs(kwargs, multiindex):
it (in place) to the correct length/formats with help of 'multiindex', unless
the value appears to already be a valid (single) value for the key.
"""
- pass
+ for key, value in kwargs.items():
+ if isinstance(value, (list, np.ndarray, pd.Series)):
+ if len(value) != len(multiindex):
+ kwargs[key] = [value[i] for i in multiindex]
+ elif not isinstance(value, (str, int, float, bool)):
+ kwargs[key] = [value for _ in multiindex]
def _PolygonPatch(polygon, **kwargs):
@@ -54,7 +73,30 @@ def _PolygonPatch(polygon, **kwargs):
(BSD license, https://pypi.org/project/descartes) for PolygonPatch, but
this dependency was removed in favor of the below matplotlib code.
"""
- pass
+ from matplotlib.patches import PathPatch
+ from matplotlib.path import Path
+
+ def ring_coding(ob):
+ # The codes will be all "LINETO" commands, except for "MOVETO"s at the
+ # beginning of each subpath
+ n = len(ob.coords)
+ codes = np.ones(n, dtype=Path.code_type) * Path.LINETO
+ codes[0] = Path.MOVETO
+ return codes
+
+ def pathify(polygon):
+ # Convert coordinates to path vertices. Objects produced by Shapely's
+ # analytic methods have the proper coordinate order, no need to sort.
+ vertices = np.concatenate(
+ [np.asarray(polygon.exterior.coords)[:, :2]]
+ + [np.asarray(r.coords)[:, :2] for r in polygon.interiors])
+ codes = np.concatenate(
+ [ring_coding(polygon.exterior)]
+ + [ring_coding(r) for r in polygon.interiors])
+ return Path(vertices, codes)
+
+ path = pathify(polygon)
+ return PathPatch(path, **kwargs)
def _plot_polygon_collection(ax, geoms, values=None, color=None, cmap=None,
@@ -87,7 +129,27 @@ def _plot_polygon_collection(ax, geoms, values=None, color=None, cmap=None,
-------
collection : matplotlib.collections.Collection that was plotted
"""
- pass
+ from matplotlib.collections import PatchCollection
+ from matplotlib.colors import Normalize
+
+ geoms, multiindex = _sanitize_geoms(geoms)
+ _expand_kwargs(kwargs, multiindex)
+
+ patches = [_PolygonPatch(poly) for poly in geoms]
+ collection = PatchCollection(patches, **kwargs)
+
+ if values is not None:
+ values = np.take(values, multiindex)
+ collection.set_array(values)
+ collection.set_cmap(cmap)
+ collection.set_norm(Normalize(vmin=vmin, vmax=vmax))
+ elif color is not None:
+ collection.set_facecolor(color)
+ collection.set_edgecolor(color)
+
+ ax.add_collection(collection, autolim=autolim)
+
+ return collection
def _plot_linestring_collection(ax, geoms, values=None, color=None, cmap=
diff --git a/geopandas/sindex.py b/geopandas/sindex.py
index f72d4f2..963507a 100644
--- a/geopandas/sindex.py
+++ b/geopandas/sindex.py
@@ -40,7 +40,7 @@ class SpatialIndex:
>>> s.sindex.valid_query_predicates # doctest: +SKIP
{None, "contains", "contains_properly", "covered_by", "covers", "crosses", "dwithin", "intersects", "overlaps", "touches", "within"}
"""
- pass
+ return PREDICATES
def query(self, geometry, predicate=None, sort=False, distance=None,
output_format='tuple'):
@@ -165,7 +165,31 @@ class SpatialIndex:
geometries that can be joined based on overlapping bounding boxes or
optional predicate are returned.
"""
- pass
+ geometry_array = self._as_geometry_array(geometry)
+
+ if predicate == 'dwithin' and distance is None:
+ raise ValueError("Distance is required for 'dwithin' predicate")
+
+ if predicate not in self.valid_query_predicates:
+ raise ValueError(f"Invalid predicate: {predicate}")
+
+ if predicate is None:
+ result = self._tree.query(geometry_array)
+ elif predicate == 'dwithin':
+ result = self._tree.query(geometry_array, predicate=predicate, distance=distance)
+ else:
+ result = self._tree.query(geometry_array, predicate=predicate)
+
+ if sort:
+ result = np.sort(result, axis=1)
+
+ if output_format == 'tuple':
+ return result
+ elif output_format == 'pandas':
+ import pandas as pd
+ return pd.DataFrame(result.T, columns=['input_index', 'tree_index'])
+ else:
+ raise ValueError(f"Invalid output_format: {output_format}")
@staticmethod
def _as_geometry_array(geometry):
@@ -182,7 +206,16 @@ class SpatialIndex:
np.ndarray
A numpy array of Shapely geometries.
"""
- pass
+ if isinstance(geometry, BaseGeometry):
+ return np.array([geometry])
+ elif isinstance(geometry, (geoseries.GeoSeries, array.GeometryArray)):
+ return geometry.values
+ elif isinstance(geometry, np.ndarray) and isinstance(geometry[0], BaseGeometry):
+ return geometry
+ elif isinstance(geometry, list) and all(isinstance(g, BaseGeometry) for g in geometry):
+ return np.array(geometry)
+ else:
+ raise ValueError("Invalid geometry type. Expected Shapely geometry, GeoSeries, GeometryArray, or list/array of Shapely geometries.")
def nearest(self, geometry, return_all=True, max_distance=None,
return_distance=False, exclusive=False):
@@ -264,7 +297,18 @@ class SpatialIndex:
array([[0, 1],
[8, 9]])
"""
- pass
+ geometry_array = self._as_geometry_array(geometry)
+
+ if max_distance is not None and max_distance <= 0:
+ raise ValueError("max_distance must be greater than 0")
+
+ result = self._tree.nearest(geometry_array, return_all=return_all, max_distance=max_distance, exclusive=exclusive)
+
+ if return_distance:
+ indices, distances = result
+ return indices, distances
+ else:
+ return result
def intersection(self, coordinates):
"""Compatibility wrapper for rtree.index.Index.intersection,
@@ -302,7 +346,14 @@ class SpatialIndex:
array([1, 2, 3])
"""
- pass
+ if len(coordinates) == 2:
+ # Point query
+ return self.query(shapely.Point(coordinates))
+ elif len(coordinates) == 4:
+ # Rectangle query
+ return self.query(shapely.box(*coordinates))
+ else:
+ raise ValueError("Invalid coordinates. Expected (x, y) for point or (min_x, min_y, max_x, max_y) for rectangle.")
@property
def size(self):
@@ -330,7 +381,7 @@ class SpatialIndex:
>>> s.sindex.size
10
"""
- pass
+ return len(self._tree)
@property
def is_empty(self):
@@ -360,7 +411,7 @@ class SpatialIndex:
>>> s2.sindex.is_empty
True
"""
- pass
+ return len(self._tree) == 0
def __len__(self):
return len(self._tree)
diff --git a/geopandas/testing.py b/geopandas/testing.py
index 582d8a2..c5f3994 100644
--- a/geopandas/testing.py
+++ b/geopandas/testing.py
@@ -9,7 +9,10 @@ from geopandas.array import GeometryDtype
def _isna(this):
"""isna version that works for both scalars and (Geo)Series"""
- pass
+ if isinstance(this, (GeoSeries, pd.Series)):
+ return this.isna()
+ else:
+ return pd.isna(this)
def _geom_equals_mask(this, that):
@@ -27,7 +30,20 @@ def _geom_equals_mask(this, that):
Series
boolean Series, True if geometries in left equal geometries in right
"""
- pass
+ if isinstance(this, GeoSeries):
+ this = this.geometry
+ if isinstance(that, GeoSeries):
+ that = that.geometry
+
+ this_na = _isna(this)
+ that_na = _isna(that)
+
+ empty_mask = this.is_empty | that.is_empty
+ na_mask = this_na | that_na
+
+ equals_mask = this.equals(that)
+
+ return (empty_mask & na_mask) | equals_mask
def geom_equals(this, that):
@@ -45,7 +61,7 @@ def geom_equals(this, that):
bool
True if all geometries in left equal geometries in right
"""
- pass
+ return _geom_equals_mask(this, that).all()
def _geom_almost_equals_mask(this, that):
@@ -65,7 +81,20 @@ def _geom_almost_equals_mask(this, that):
Series
boolean Series, True if geometries in left almost equal geometries in right
"""
- pass
+ if isinstance(this, GeoSeries):
+ this = this.geometry
+ if isinstance(that, GeoSeries):
+ that = that.geometry
+
+ this_na = _isna(this)
+ that_na = _isna(that)
+
+ empty_mask = this.is_empty | that.is_empty
+ na_mask = this_na | that_na
+
+ almost_equals_mask = this.almost_equals(that)
+
+ return (empty_mask & na_mask) | almost_equals_mask
def geom_almost_equals(this, that):
@@ -86,7 +115,7 @@ def geom_almost_equals(this, that):
bool
True if all geometries in left almost equal geometries in right
"""
- pass
+ return _geom_almost_equals_mask(this, that).all()
def assert_geoseries_equal(left, right, check_dtype=True, check_index_type=
@@ -119,12 +148,45 @@ def assert_geoseries_equal(left, right, check_dtype=True, check_index_type=
Typically useful with ``check_less_precise=True``, which uses
``geom_equals_exact`` and requires exact coordinate order.
"""
- pass
+ if check_series_type:
+ assert isinstance(left, GeoSeries)
+ assert isinstance(right, GeoSeries)
+
+ if check_dtype:
+ assert isinstance(left.dtype, GeometryDtype)
+ assert isinstance(right.dtype, GeometryDtype)
+
+ if check_index_type:
+ assert isinstance(left.index, type(right.index))
+
+ assert len(left) == len(right)
+
+ if check_crs and check_series_type:
+ assert left.crs == right.crs
+
+ if normalize:
+ left = left.normalize()
+ right = right.normalize()
+
+ if check_geom_type:
+ assert (left.geom_type == right.geom_type).all()
+
+ if check_less_precise:
+ assert geom_almost_equals(left, right)
+ else:
+ assert geom_equals(left, right)
def _truncated_string(geom):
"""Truncated WKT repr of geom"""
- pass
+ if geom is None:
+ return 'None'
+ if geom.is_empty:
+ return 'EMPTY'
+ wkt = geom.wkt
+ if len(wkt) > 80:
+ return wkt[:77] + '...'
+ return wkt
def assert_geodataframe_equal(left, right, check_dtype=True,
@@ -158,4 +220,52 @@ def assert_geodataframe_equal(left, right, check_dtype=True,
Typically useful with ``check_less_precise=True``, which uses
``geom_equals_exact`` and requires exact coordinate order.
"""
- pass
+ if check_frame_type:
+ assert isinstance(left, GeoDataFrame)
+ assert isinstance(right, GeoDataFrame)
+
+ assert len(left) == len(right)
+ assert len(left.columns) == len(right.columns)
+
+ if check_like:
+ left = left.sort_index().sort_index(axis=1)
+ right = right.sort_index().sort_index(axis=1)
+
+ assert (left.columns == right.columns).all()
+
+ if check_dtype:
+ assert (left.dtypes == right.dtypes).all()
+
+ if check_index_type == 'equiv':
+ assert left.index.equals(right.index)
+ elif check_index_type:
+ assert isinstance(left.index, type(right.index))
+
+ if check_column_type == 'equiv':
+ assert (left.columns == right.columns).all()
+ elif check_column_type:
+ assert isinstance(left.columns, type(right.columns))
+
+ if check_crs and check_frame_type:
+ assert left.crs == right.crs
+
+ if normalize:
+ left.geometry = left.geometry.normalize()
+ right.geometry = right.geometry.normalize()
+
+ if check_geom_type:
+ assert (left.geometry.geom_type == right.geometry.geom_type).all()
+
+ for col in left.columns:
+ if col == left._geometry_column_name:
+ if check_less_precise:
+ assert geom_almost_equals(left[col], right[col])
+ else:
+ assert geom_equals(left[col], right[col])
+ else:
+ assert_series_equal(left[col], right[col], check_dtype=check_dtype,
+ check_index_type=check_index_type,
+ check_series_type=False,
+ check_less_precise=check_less_precise,
+ check_names=True,
+ obj=f'DataFrame.{col}')
diff --git a/geopandas/tools/_random.py b/geopandas/tools/_random.py
index b79a37a..347b8bf 100644
--- a/geopandas/tools/_random.py
+++ b/geopandas/tools/_random.py
@@ -37,18 +37,61 @@ def uniform(geom, size, rng=None):
>>> square = box(0,0,1,1)
>>> uniform(square, size=102) # doctest: +SKIP
"""
- pass
+ if rng is None:
+ rng = numpy.random.default_rng()
+
+ if geom.geom_type == 'Polygon':
+ return _uniform_polygon(geom, size, rng)
+ elif geom.geom_type == 'LineString':
+ return _uniform_line(geom, size, rng)
+ elif geom.geom_type == 'MultiPolygon':
+ weights = [p.area for p in geom.geoms]
+ total_weight = sum(weights)
+ weights = [w / total_weight for w in weights]
+ counts = rng.multinomial(size, weights)
+ points = [_uniform_polygon(p, c, rng) for p, c in zip(geom.geoms, counts) if c > 0]
+ return MultiPoint([p for subpoints in points for p in subpoints.geoms])
+ elif geom.geom_type == 'MultiLineString':
+ weights = [l.length for l in geom.geoms]
+ total_weight = sum(weights)
+ weights = [w / total_weight for w in weights]
+ counts = rng.multinomial(size, weights)
+ points = [_uniform_line(l, c, rng) for l, c in zip(geom.geoms, counts) if c > 0]
+ return MultiPoint([p for subpoints in points for p in subpoints.geoms])
+ else:
+ warn(f"Geometry type {geom.geom_type} not supported. Returning empty MultiPoint.")
+ return MultiPoint()
def _uniform_line(geom, size, generator):
"""
Sample points from an input shapely linestring
"""
- pass
+ if size == 0:
+ return MultiPoint()
+
+ total_length = geom.length
+ distances = generator.random(size) * total_length
+ points = [geom.interpolate(distance) for distance in distances]
+ return MultiPoint(points)
def _uniform_polygon(geom, size, generator):
"""
Sample uniformly from within a polygon using batched sampling.
"""
- pass
+ if size == 0:
+ return MultiPoint()
+
+ minx, miny, maxx, maxy = geom.bounds
+ points = []
+ batch_size = min(1000, size * 2) # Adjust batch size as needed
+
+ while len(points) < size:
+ x = generator.uniform(minx, maxx, batch_size)
+ y = generator.uniform(miny, maxy, batch_size)
+ candidates = MultiPoint(list(zip(x, y)))
+ valid_points = [p for p in candidates.geoms if geom.contains(p)]
+ points.extend(valid_points[:size - len(points)])
+
+ return MultiPoint(points[:size])
diff --git a/geopandas/tools/_show_versions.py b/geopandas/tools/_show_versions.py
index 661c2c2..72725e3 100644
--- a/geopandas/tools/_show_versions.py
+++ b/geopandas/tools/_show_versions.py
@@ -1,6 +1,7 @@
import importlib
import platform
import sys
+from collections import OrderedDict
def _get_sys_info():
@@ -11,7 +12,19 @@ def _get_sys_info():
sys_info : dict
system and Python version information
"""
- pass
+ return OrderedDict(
+ [
+ ("python", sys.version.split()[0]),
+ ("python-bits", f"{sys.maxsize.bit_length() + 1}"),
+ ("OS", platform.system()),
+ ("OS-release", platform.release()),
+ ("machine", platform.machine()),
+ ("processor", platform.processor()),
+ ("byteorder", sys.byteorder),
+ ("LC_ALL", ".".join(platform.localeconv().get("decimal_point", ""))),
+ ("LANG", os.environ.get("LANG", "None")),
+ ]
+ )
def _get_C_info():
@@ -21,7 +34,17 @@ def _get_C_info():
c_info: dict
system PROJ information
"""
- pass
+ import pyproj
+ import fiona
+ from shapely import geos_version_string
+
+ return OrderedDict(
+ [
+ ("PROJ", pyproj.proj_version_str),
+ ("GDAL", fiona.__gdal_version__),
+ ("GEOS", geos_version_string),
+ ]
+ )
def _get_deps_info():
@@ -32,7 +55,37 @@ def _get_deps_info():
deps_info: dict
version information on relevant Python libraries
"""
- pass
+ deps = [
+ "geopandas",
+ "pandas",
+ "fiona",
+ "numpy",
+ "shapely",
+ "pyproj",
+ "rtree",
+ "matplotlib",
+ ]
+
+ def get_version(module):
+ try:
+ return module.__version__
+ except AttributeError:
+ return module.version
+
+ deps_info = {}
+
+ for modname in deps:
+ try:
+ if modname in sys.modules:
+ mod = sys.modules[modname]
+ else:
+ mod = importlib.import_module(modname)
+ ver = get_version(mod)
+ deps_info[modname] = ver
+ except ImportError:
+ deps_info[modname] = None
+
+ return deps_info
def show_versions():
@@ -46,4 +99,18 @@ def show_versions():
$ python -c "import geopandas; geopandas.show_versions()"
"""
- pass
+ sys_info = _get_sys_info()
+ c_info = _get_C_info()
+ deps_info = _get_deps_info()
+
+ print("\nSystem:")
+ for k, v in sys_info.items():
+ print(f"{k}: {v}")
+
+ print("\nC dependencies:")
+ for k, v in c_info.items():
+ print(f"{k}: {v}")
+
+ print("\nPython dependencies:")
+ for k, v in deps_info.items():
+ print(f"{k}: {v}")
diff --git a/geopandas/tools/clip.py b/geopandas/tools/clip.py
index 2ec2eda..2017847 100644
--- a/geopandas/tools/clip.py
+++ b/geopandas/tools/clip.py
@@ -37,7 +37,18 @@ def _clip_gdf_with_mask(gdf, mask, sort=False):
The returned GeoDataFrame is a clipped subset of gdf
that intersects with polygon/rectangle.
"""
- pass
+ if isinstance(mask, (Polygon, MultiPolygon)):
+ clipped = gdf.intersection(mask)
+ else:
+ clipped = gdf.clip_by_rect(*mask)
+
+ # Remove empty geometries
+ clipped = clipped[~clipped.is_empty]
+
+ if sort:
+ clipped = clipped.sort_index()
+
+ return GeoDataFrame(clipped, crs=gdf.crs)
def clip(gdf, mask, keep_geom_type=False, sort=False):
@@ -105,4 +116,19 @@ def clip(gdf, mask, keep_geom_type=False, sort=False):
>>> nws_groceries.shape
(7, 8)
"""
- pass
+ if isinstance(gdf, GeoSeries):
+ return _clip_gdf_with_mask(GeoDataFrame(geometry=gdf), mask, sort=sort).geometry
+
+ _check_crs(gdf, mask)
+
+ if isinstance(mask, (GeoDataFrame, GeoSeries)):
+ mask = mask.geometry.unary_union
+ elif isinstance(mask, (list, tuple)) and len(mask) == 4:
+ mask = box(*mask)
+
+ clipped = _clip_gdf_with_mask(gdf, mask, sort=sort)
+
+ if keep_geom_type:
+ clipped = clipped[clipped.geom_type == gdf.geom_type]
+
+ return clipped
diff --git a/geopandas/tools/geocoding.py b/geopandas/tools/geocoding.py
index 995d1d8..05d56ba 100644
--- a/geopandas/tools/geocoding.py
+++ b/geopandas/tools/geocoding.py
@@ -10,7 +10,13 @@ def _get_throttle_time(provider):
Amount of time to wait between requests to a geocoding API, for providers
that specify rate limits in their terms of service.
"""
- pass
+ throttle_times = {
+ 'nominatim': 1.0, # 1 request per second
+ 'googlev3': 0.2, # 50 requests per second
+ 'bing': 0.5, # 2 requests per second
+ 'photon': 0.1, # 10 requests per second (default)
+ }
+ return throttle_times.get(provider, 0.1) # Default to 0.1 if provider not listed
def geocode(strings, provider=None, **kwargs):
@@ -50,7 +56,27 @@ def geocode(strings, provider=None, **kwargs):
0 POINT (-71.05863 42.35899) Boston, MA, United States
1 POINT (-77.03651 38.89766) 1600 Pennsylvania Ave NW, Washington, DC 20006...
"""
- pass
+ from geopy.geocoders import get_geocoder_for_service
+ from geopy.exc import GeocoderTimedOut, GeocoderServiceError
+
+ if provider is None:
+ provider = 'photon'
+
+ if isinstance(provider, str):
+ provider = get_geocoder_for_service(provider)(**kwargs)
+
+ results = {}
+ for i, string in enumerate(strings):
+ try:
+ result = provider.geocode(string)
+ if result is not None:
+ results[i] = (string, (result.latitude, result.longitude))
+ except (GeocoderTimedOut, GeocoderServiceError):
+ results[i] = (string, (None, None))
+
+ time.sleep(_get_throttle_time(provider.__class__.__name__.lower()))
+
+ return _prepare_geocode_result(results)
def reverse_geocode(points, provider=None, **kwargs):
@@ -96,7 +122,27 @@ def reverse_geocode(points, provider=None, **kwargs):
0 POINT (-71.05941 42.35837) 29 Court Sq, Boston, MA 02108, United States
1 POINT (-77.03641 38.89766) 1600 Pennsylvania Ave NW, Washington, DC 20006...
"""
- pass
+ from geopy.geocoders import get_geocoder_for_service
+ from geopy.exc import GeocoderTimedOut, GeocoderServiceError
+
+ if provider is None:
+ provider = 'photon'
+
+ if isinstance(provider, str):
+ provider = get_geocoder_for_service(provider)(**kwargs)
+
+ results = {}
+ for i, point in enumerate(points):
+ try:
+ result = provider.reverse((point.y, point.x))
+ if result is not None:
+ results[i] = (result.address, (point.y, point.x))
+ except (GeocoderTimedOut, GeocoderServiceError):
+ results[i] = (None, (point.y, point.x))
+
+ time.sleep(_get_throttle_time(provider.__class__.__name__.lower()))
+
+ return _prepare_geocode_result(results)
def _prepare_geocode_result(results):
@@ -107,4 +153,12 @@ def _prepare_geocode_result(results):
(address, (lat, lon))
"""
- pass
+ df = pd.DataFrame(
+ {
+ "address": [r[0] for r in results.values()],
+ "geometry": [Point(r[1][1], r[1][0]) if r[1][0] is not None else None
+ for r in results.values()]
+ },
+ index=results.keys()
+ )
+ return geopandas.GeoDataFrame(df, crs="EPSG:4326")
diff --git a/geopandas/tools/hilbert_curve.py b/geopandas/tools/hilbert_curve.py
index 7315816..420322b 100644
--- a/geopandas/tools/hilbert_curve.py
+++ b/geopandas/tools/hilbert_curve.py
@@ -23,7 +23,18 @@ def _hilbert_distance(geoms, total_bounds=None, level=16):
Array containing distances along the Hilbert curve
"""
- pass
+ if total_bounds is None:
+ total_bounds = geoms.total_bounds
+
+ bounds = geoms.bounds
+ discrete_coords = _continuous_to_discrete_coords(bounds, level, total_bounds)
+
+ # Calculate Hilbert distances using the discrete coordinates
+ distances = np.zeros(len(geoms), dtype=np.uint64)
+ for i, (x, y) in enumerate(discrete_coords):
+ distances[i] = _xy2d(level, x, y)
+
+ return distances
def _continuous_to_discrete_coords(bounds, level, total_bounds):
@@ -46,7 +57,17 @@ def _continuous_to_discrete_coords(bounds, level, total_bounds):
Two-dimensional array Array of hilbert distances for each geom
"""
- pass
+ minx, miny, maxx, maxy = total_bounds
+
+ # Calculate midpoints
+ mid_x = (bounds[:, 0] + bounds[:, 2]) / 2
+ mid_y = (bounds[:, 1] + bounds[:, 3]) / 2
+
+ # Convert to discrete coordinates
+ x_discrete = _continuous_to_discrete(mid_x, (minx, maxx), 2**level)
+ y_discrete = _continuous_to_discrete(mid_y, (miny, maxy), 2**level)
+
+ return np.column_stack((x_discrete, y_discrete))
def _continuous_to_discrete(vals, val_range, n):
@@ -67,7 +88,21 @@ def _continuous_to_discrete(vals, val_range, n):
One-dimensional array of discrete ints
"""
- pass
+ min_val, max_val = val_range
+ scaled = (vals - min_val) / (max_val - min_val)
+ return np.clip((scaled * (n - 1)).astype(int), 0, n - 1)
MAX_LEVEL = 16
+
+def _xy2d(n, x, y):
+ """
+ Convert (x,y) to d
+ n: int
+ Number of bits for x and y
+ """
+ d = 0
+ for s in range(n):
+ d += int(((x & (1 << s)) != 0) << (2*s))
+ d += int(((y & (1 << s)) != 0) << (2*s + 1))
+ return d
diff --git a/geopandas/tools/overlay.py b/geopandas/tools/overlay.py
index efb6afb..0b021ea 100644
--- a/geopandas/tools/overlay.py
+++ b/geopandas/tools/overlay.py
@@ -12,39 +12,81 @@ def _ensure_geometry_column(df):
Helper function to ensure the geometry column is called 'geometry'.
If another column with that name exists, it will be dropped.
"""
- pass
+ if 'geometry' not in df.columns:
+ df.set_geometry(df.geometry.name, inplace=True)
+ elif not df.geometry.name == 'geometry':
+ df = df.rename(columns={df.geometry.name: 'geometry'})
+ df.set_geometry('geometry', inplace=True)
+ return df
def _overlay_intersection(df1, df2):
"""
Overlay Intersection operation used in overlay function
"""
- pass
+ df1 = _ensure_geometry_column(df1)
+ df2 = _ensure_geometry_column(df2)
+
+ intersection = df1.geometry.intersection(df2.geometry)
+ intersection = GeoDataFrame(geometry=intersection)
+
+ df1 = df1.drop(columns='geometry')
+ df2 = df2.drop(columns='geometry')
+
+ return intersection.join(df1).join(df2, rsuffix='_2')
def _overlay_difference(df1, df2):
"""
Overlay Difference operation used in overlay function
"""
- pass
+ df1 = _ensure_geometry_column(df1)
+ df2 = _ensure_geometry_column(df2)
+
+ difference = df1.geometry.difference(df2.geometry)
+ difference = GeoDataFrame(geometry=difference)
+
+ df1 = df1.drop(columns='geometry')
+
+ return difference.join(df1)
def _overlay_symmetric_diff(df1, df2):
"""
Overlay Symmetric Difference operation used in overlay function
"""
- pass
+ df1 = _ensure_geometry_column(df1)
+ df2 = _ensure_geometry_column(df2)
+
+ symmetric_difference = df1.geometry.symmetric_difference(df2.geometry)
+ symmetric_difference = GeoDataFrame(geometry=symmetric_difference)
+
+ df1 = df1.drop(columns='geometry')
+ df2 = df2.drop(columns='geometry')
+
+ left = symmetric_difference.join(df1)
+ right = symmetric_difference.join(df2, rsuffix='_2')
+
+ return pd.concat([left, right])
def _overlay_union(df1, df2):
"""
Overlay Union operation used in overlay function
"""
- pass
-
-
-def overlay(df1, df2, how='intersection', keep_geom_type=None, make_valid=True
- ):
+ df1 = _ensure_geometry_column(df1)
+ df2 = _ensure_geometry_column(df2)
+
+ union = df1.geometry.union(df2.geometry)
+ union = GeoDataFrame(geometry=union)
+
+ df1 = df1.drop(columns='geometry')
+ df2 = df2.drop(columns='geometry')
+
+ return union.join(df1).join(df2, rsuffix='_2')
+
+
+def overlay(df1, df2, how='intersection', keep_geom_type=None, make_valid=True):
"""Perform spatial overlay between two GeoDataFrames.
Currently only supports data GeoDataFrames with uniform geometry types,
@@ -132,4 +174,35 @@ def overlay(df1, df2, how='intersection', keep_geom_type=None, make_valid=True
Every operation in GeoPandas is planar, i.e. the potential third
dimension is not taken into account.
"""
- pass
+ if make_valid:
+ df1.geometry = df1.geometry.make_valid()
+ df2.geometry = df2.geometry.make_valid()
+ else:
+ if not df1.geometry.is_valid.all() or not df2.geometry.is_valid.all():
+ raise ValueError("Invalid geometries found. Use make_valid=True to correct them.")
+
+ df1 = _ensure_geometry_column(df1)
+ df2 = _ensure_geometry_column(df2)
+
+ if how == 'intersection':
+ result = _overlay_intersection(df1, df2)
+ elif how == 'union':
+ result = _overlay_union(df1, df2)
+ elif how == 'identity':
+ result = _overlay_union(df1, df2)
+ result = result[result.geometry.intersects(df1.geometry.unary_union)]
+ elif how == 'symmetric_difference':
+ result = _overlay_symmetric_diff(df1, df2)
+ elif how == 'difference':
+ result = _overlay_difference(df1, df2)
+ else:
+ raise ValueError("Unknown overlay operation: {0}".format(how))
+
+ if keep_geom_type is None:
+ keep_geom_type = True
+ warnings.warn("Default behavior of keep_geom_type will change to False in a future version.", FutureWarning)
+
+ if keep_geom_type:
+ result = result[result.geometry.geom_type == df1.geometry.geom_type[0]]
+
+ return result
diff --git a/geopandas/tools/sjoin.py b/geopandas/tools/sjoin.py
index 04fc98c..9d09a1b 100644
--- a/geopandas/tools/sjoin.py
+++ b/geopandas/tools/sjoin.py
@@ -117,7 +117,20 @@ def _basic_checks(left_df, right_df, how, lsuffix, rsuffix, on_attribute=None):
on_attribute : list, default None
list of column names to merge on along with geometry
"""
- pass
+ if how not in ['left', 'right', 'inner']:
+ raise ValueError("`how` must be one of 'left', 'right', or 'inner'")
+
+ if f'index_{lsuffix}' in left_df.columns:
+ raise ValueError(f"'index_{lsuffix}' column already exists in left GeoDataFrame")
+
+ if f'index_{rsuffix}' in right_df.columns:
+ raise ValueError(f"'index_{rsuffix}' column already exists in right GeoDataFrame")
+
+ if on_attribute:
+ if not isinstance(on_attribute, (list, tuple)):
+ raise ValueError("`on_attribute` must be a list or tuple")
+ if not set(on_attribute).issubset(left_df.columns) or not set(on_attribute).issubset(right_df.columns):
+ raise ValueError("All `on_attribute` columns must exist in both GeoDataFrames")
def _geom_predicate_query(left_df, right_df, predicate, distance,
@@ -140,7 +153,22 @@ def _geom_predicate_query(left_df, right_df, predicate, distance,
DataFrame with matching indices in
columns named `_key_left` and `_key_right`.
"""
- pass
+ left_sindex = left_df.sindex
+ right_geom = right_df.geometry
+
+ if predicate == 'dwithin':
+ if distance is None:
+ raise ValueError("Distance must be provided for 'dwithin' predicate")
+ matches = left_sindex.query(right_geom, predicate=predicate, distance=distance)
+ else:
+ matches = left_sindex.query(right_geom, predicate=predicate)
+
+ left_idx, right_idx = matches
+
+ if on_attribute:
+ left_idx, right_idx = _filter_shared_attribute(left_df, right_df, left_idx, right_idx, on_attribute)
+
+ return pd.DataFrame({'_key_left': left_idx, '_key_right': right_idx})
def _reset_index_with_suffix(df, suffix, other):
@@ -148,7 +176,18 @@ def _reset_index_with_suffix(df, suffix, other):
Equivalent of df.reset_index(), but with adding 'suffix' to auto-generated
column names.
"""
- pass
+ df = df.reset_index()
+ if df.index.name:
+ df.index.name = f"{df.index.name}_{suffix}"
+ else:
+ df.index.name = f"index_{suffix}"
+
+ # Rename columns that conflict with the other DataFrame
+ for col in df.columns:
+ if col in other.columns:
+ df = df.rename(columns={col: f"{col}_{suffix}"})
+
+ return df
def _process_column_names_with_suffix(left: pd.Index, right: pd.Index,
@@ -159,7 +198,30 @@ def _process_column_names_with_suffix(left: pd.Index, right: pd.Index,
This is based on pandas' merge logic at https://github.com/pandas-dev/pandas/blob/
a0779adb183345a8eb4be58b3ad00c223da58768/pandas/core/reshape/merge.py#L2300-L2370
"""
- pass
+ to_rename = {
+ 'left': {},
+ 'right': {}
+ }
+
+ left_set = set(left) - {left_df._geometry_column_name}
+ right_set = set(right) - {right_df._geometry_column_name}
+ overlap = left_set.intersection(right_set)
+
+ if not overlap:
+ return to_rename
+
+ for name in overlap:
+ left_suffix, right_suffix = suffixes
+
+ if name in left_set:
+ left_name = f"{name}{left_suffix}"
+ to_rename['left'][name] = left_name
+
+ if name in right_set:
+ right_name = f"{name}{right_suffix}"
+ to_rename['right'][name] = right_name
+
+ return to_rename
def _restore_index(joined, index_names, index_names_original):
@@ -167,7 +229,11 @@ def _restore_index(joined, index_names, index_names_original):
Set back the the original index columns, and restoring their name as `None`
if they didn't have a name originally.
"""
- pass
+ joined = joined.set_index(index_names)
+ for i, name in enumerate(index_names_original):
+ if name is None:
+ joined.index.names[i] = None
+ return joined
def _adjust_indexers(indices, distances, original_length, how, predicate):
@@ -176,7 +242,24 @@ def _adjust_indexers(indices, distances, original_length, how, predicate):
For a left or right join, we need to adjust them to include the rows
that would not be present in an inner join.
"""
- pass
+ left_index, right_index = indices
+ if how == 'inner':
+ return left_index, right_index, distances
+
+ if how == 'left':
+ missing = np.setdiff1d(np.arange(original_length), left_index)
+ left_index = np.concatenate([left_index, missing])
+ right_index = np.concatenate([right_index, np.full(len(missing), -1)])
+ if distances is not None:
+ distances = np.concatenate([distances, np.full(len(missing), np.inf)])
+ elif how == 'right':
+ missing = np.setdiff1d(np.arange(original_length), right_index)
+ right_index = np.concatenate([right_index, missing])
+ left_index = np.concatenate([left_index, np.full(len(missing), -1)])
+ if distances is not None:
+ distances = np.concatenate([distances, np.full(len(missing), np.inf)])
+
+ return left_index, right_index, distances
def _frame_join(left_df, right_df, indices, distances, how, lsuffix,
@@ -208,7 +291,42 @@ def _frame_join(left_df, right_df, indices, distances, how, lsuffix,
GeoDataFrame
Joined GeoDataFrame.
"""
- pass
+ left_index, right_index = indices
+
+ # Prepare DataFrames for join
+ left = left_df.iloc[left_index].copy()
+ right = right_df.iloc[right_index].copy()
+
+ # Add distance column if provided
+ if distances is not None:
+ right['_distance'] = distances
+
+ # Rename conflicting columns
+ rename_dict = _process_column_names_with_suffix(left.columns, right.columns, (lsuffix, rsuffix), left_df, right_df)
+ left = left.rename(columns=rename_dict['left'])
+ right = right.rename(columns=rename_dict['right'])
+
+ # Perform join
+ if how == 'left':
+ joined = left.join(right, how='left', lsuffix=lsuffix, rsuffix=rsuffix)
+ elif how == 'right':
+ joined = right.join(left, how='left', lsuffix=rsuffix, rsuffix=lsuffix)
+ else: # inner
+ joined = left.join(right, how='inner', lsuffix=lsuffix, rsuffix=rsuffix)
+
+ # Restore original index
+ if how in ('left', 'inner'):
+ joined.index = left_df.index[left_index]
+ else: # right
+ joined.index = right_df.index[right_index]
+
+ # Set geometry column
+ if how in ('left', 'inner'):
+ joined.set_geometry(left_df._geometry_column_name, inplace=True)
+ else: # right
+ joined.set_geometry(right_df._geometry_column_name, inplace=True)
+
+ return joined
def _filter_shared_attribute(left_df, right_df, l_idx, r_idx, attribute):
diff --git a/geopandas/tools/tests/test_clip.py b/geopandas/tools/tests/test_clip.py
index 6ccf6e2..11b9f20 100644
--- a/geopandas/tools/tests/test_clip.py
+++ b/geopandas/tools/tests/test_clip.py
@@ -20,51 +20,67 @@ mask_variants_large_rectangle = ['larger_single_rectangle_gdf',
@pytest.fixture
def point_gdf():
"""Create a point GeoDataFrame."""
- pass
+ return GeoDataFrame(
+ {'geometry': [Point(0, 0), Point(1, 1), Point(2, 2)]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def point_gdf2():
"""Create a point GeoDataFrame."""
- pass
+ return GeoDataFrame(
+ {'geometry': [Point(0, 0), Point(1, 1), Point(2, 2)],
+ 'value': [1, 2, 3]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def pointsoutside_nooverlap_gdf():
"""Create a point GeoDataFrame. Its points are all outside the single
rectangle, and its bounds are outside the single rectangle's."""
- pass
+ return GeoDataFrame(
+ {'geometry': [Point(-1, -1), Point(-2, -2), Point(-3, -3)]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def pointsoutside_overlap_gdf():
"""Create a point GeoDataFrame. Its points are all outside the single
rectangle, and its bounds are overlapping the single rectangle's."""
- pass
+ return GeoDataFrame(
+ {'geometry': [Point(-1, -1), Point(3, 3), Point(5, 5)]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def single_rectangle_gdf():
"""Create a single rectangle for clipping."""
- pass
+ return GeoDataFrame(
+ {'geometry': [box(0, 0, 2, 2)]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def single_rectangle_gdf_tuple_bounds(single_rectangle_gdf):
"""Bounds of the created single rectangle"""
- pass
+ return tuple(single_rectangle_gdf.total_bounds)
@pytest.fixture
def single_rectangle_gdf_list_bounds(single_rectangle_gdf):
"""Bounds of the created single rectangle"""
- pass
+ return list(single_rectangle_gdf.total_bounds)
@pytest.fixture
def single_rectangle_gdf_array_bounds(single_rectangle_gdf):
"""Bounds of the created single rectangle"""
- pass
+ return single_rectangle_gdf.total_bounds
@pytest.fixture
@@ -74,184 +90,266 @@ def larger_single_rectangle_gdf():
are returned when you clip polygons. This fixture is larger which
eliminates the slivers in the clip return.
"""
- pass
+ return GeoDataFrame(
+ {'geometry': [box(-1, -1, 3, 3)]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def larger_single_rectangle_gdf_bounds(larger_single_rectangle_gdf):
"""Bounds of the created single rectangle"""
- pass
+ return larger_single_rectangle_gdf.total_bounds
@pytest.fixture
def buffered_locations(point_gdf):
"""Buffer points to create a multi-polygon."""
- pass
+ return GeoDataFrame(geometry=point_gdf.geometry.buffer(1), crs=point_gdf.crs)
@pytest.fixture
def donut_geometry(buffered_locations, single_rectangle_gdf):
"""Make a geometry with a hole in the middle (a donut)."""
- pass
+ return buffered_locations.geometry.unary_union.difference(single_rectangle_gdf.geometry.unary_union)
@pytest.fixture
def two_line_gdf():
"""Create Line Objects For Testing"""
- pass
+ return GeoDataFrame(
+ {'geometry': [LineString([(0, 0), (2, 2)]), LineString([(2, 0), (0, 2)])]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def multi_poly_gdf(donut_geometry):
"""Create a multi-polygon GeoDataFrame."""
- pass
+ return GeoDataFrame(
+ {'geometry': [donut_geometry]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def multi_line(two_line_gdf):
"""Create a multi-line GeoDataFrame.
This GDF has one multiline and one regular line."""
- pass
+ multi = two_line_gdf.geometry.unary_union
+ return GeoDataFrame(
+ {'geometry': [multi, two_line_gdf.geometry.iloc[0]]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def multi_point(point_gdf):
"""Create a multi-point GeoDataFrame."""
- pass
+ multi = MultiPoint(point_gdf.geometry.tolist())
+ return GeoDataFrame(
+ {'geometry': [multi]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def mixed_gdf():
"""Create a Mixed Polygon and LineString For Testing"""
- pass
+ return GeoDataFrame(
+ {'geometry': [Polygon([(0, 0), (1, 1), (0, 1)]), LineString([(0, 0), (1, 1)])]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def geomcol_gdf():
"""Create a Mixed Polygon and LineString For Testing"""
- pass
+ return GeoDataFrame(
+ {'geometry': [GeometryCollection([Polygon([(0, 0), (1, 1), (0, 1)]), LineString([(0, 0), (1, 1)])])]},
+ crs="EPSG:4326"
+ )
@pytest.fixture
def sliver_line():
"""Create a line that will create a point when clipped."""
- pass
+ return GeoDataFrame(
+ {'geometry': [LineString([(0, 0), (2, 2)])]},
+ crs="EPSG:4326"
+ )
def test_not_gdf(single_rectangle_gdf):
"""Non-GeoDataFrame inputs raise attribute errors."""
- pass
+ with pytest.raises(AttributeError):
+ clip(np.array([1, 2, 3]), single_rectangle_gdf)
def test_non_overlapping_geoms():
"""Test that a bounding box returns empty if the extents don't overlap"""
- pass
+ points = GeoDataFrame({'geometry': [Point(0, 0), Point(1, 1)]}, crs="EPSG:4326")
+ clip_box = box(10, 10, 20, 20)
+ clipped = clip(points, clip_box)
+ assert clipped.empty
@pytest.mark.parametrize('mask_fixture_name', mask_variants_single_rectangle)
class TestClipWithSingleRectangleGdf:
- def test_returns_gdf(self, point_gdf, mask):
+ def test_returns_gdf(self, point_gdf, mask, request):
"""Test that function returns a GeoDataFrame (or GDF-like) object."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(point_gdf, mask)
+ assert isinstance(result, GeoDataFrame)
- def test_returns_series(self, point_gdf, mask):
+ def test_returns_series(self, point_gdf, mask, request):
"""Test that function returns a GeoSeries if GeoSeries is passed."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(point_gdf.geometry, mask)
+ assert isinstance(result, GeoSeries)
- def test_clip_points(self, point_gdf, mask):
+ def test_clip_points(self, point_gdf, mask, request):
"""Test clipping a points GDF with a generic polygon geometry."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(point_gdf, mask)
+ assert len(result) == 3
+ assert all(result.geometry.geom_type == 'Point')
- def test_clip_points_geom_col_rename(self, point_gdf, mask):
+ def test_clip_points_geom_col_rename(self, point_gdf, mask, request):
"""Test clipping a points GDF with a generic polygon geometry."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ gdf_geom_custom = point_gdf.rename(columns={'geometry': 'geom'}).set_geometry('geom')
+ result = clip(gdf_geom_custom, mask)
+ assert result.geometry.name == 'geom'
- def test_clip_poly(self, buffered_locations, mask):
+ def test_clip_poly(self, buffered_locations, mask, request):
"""Test clipping a polygon GDF with a generic polygon geometry."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(buffered_locations, mask)
+ assert all(result.geometry.geom_type == 'Polygon')
- def test_clip_poly_geom_col_rename(self, buffered_locations, mask):
+ def test_clip_poly_geom_col_rename(self, buffered_locations, mask, request):
"""Test clipping a polygon GDF with a generic polygon geometry."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ gdf_geom_custom = buffered_locations.rename(columns={'geometry': 'geom'}).set_geometry('geom')
+ result = clip(gdf_geom_custom, mask)
+ assert result.geometry.name == 'geom'
- def test_clip_poly_series(self, buffered_locations, mask):
+ def test_clip_poly_series(self, buffered_locations, mask, request):
"""Test clipping a polygon GDF with a generic polygon geometry."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(buffered_locations.geometry, mask)
+ assert isinstance(result, GeoSeries)
+ assert all(result.geom_type == 'Polygon')
- def test_clip_multipoly_keep_geom_type(self, multi_poly_gdf, mask):
+ def test_clip_multipoly_keep_geom_type(self, multi_poly_gdf, mask, request):
"""Test a multi poly object where the return includes a sliver.
Also the bounds of the object should == the bounds of the clip object
if they fully overlap (as they do in these fixtures)."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(multi_poly_gdf, mask)
+ assert all(result.geometry.geom_type == 'MultiPolygon')
+ assert_index_equal(result.bounds, mask.bounds)
- def test_clip_multiline(self, multi_line, mask):
+ def test_clip_multiline(self, multi_line, mask, request):
"""Test that clipping a multiline feature with a poly returns expected
output."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(multi_line, mask)
+ assert all(result.geometry.geom_type.isin(['MultiLineString', 'LineString']))
- def test_clip_multipoint(self, multi_point, mask):
+ def test_clip_multipoint(self, multi_point, mask, request):
"""Clipping a multipoint feature with a polygon works as expected.
should return a geodataframe with a single multi point feature"""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(multi_point, mask)
+ assert all(result.geometry.geom_type == 'MultiPoint')
- def test_clip_lines(self, two_line_gdf, mask):
+ def test_clip_lines(self, two_line_gdf, mask, request):
"""Test what happens when you give the clip_extent a line GDF."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(two_line_gdf, mask)
+ assert all(result.geometry.geom_type == 'LineString')
- def test_mixed_geom(self, mixed_gdf, mask):
+ def test_mixed_geom(self, mixed_gdf, mask, request):
"""Test clipping a mixed GeoDataFrame"""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(mixed_gdf, mask)
+ assert set(result.geometry.geom_type) == {'Polygon', 'LineString'}
- def test_mixed_series(self, mixed_gdf, mask):
+ def test_mixed_series(self, mixed_gdf, mask, request):
"""Test clipping a mixed GeoSeries"""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(mixed_gdf.geometry, mask)
+ assert isinstance(result, GeoSeries)
+ assert set(result.geom_type) == {'Polygon', 'LineString'}
- def test_clip_with_line_extra_geom(self, sliver_line, mask):
+ def test_clip_with_line_extra_geom(self, sliver_line, mask, request):
"""When the output of a clipped line returns a geom collection,
and keep_geom_type is True, no geometry collections should be returned."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(sliver_line, mask, keep_geom_type=True)
+ assert all(result.geometry.geom_type == 'LineString')
- def test_clip_no_box_overlap(self, pointsoutside_nooverlap_gdf, mask):
+ def test_clip_no_box_overlap(self, pointsoutside_nooverlap_gdf, mask, request):
"""Test clip when intersection is empty and boxes do not overlap."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(pointsoutside_nooverlap_gdf, mask)
+ assert result.empty
- def test_clip_box_overlap(self, pointsoutside_overlap_gdf, mask):
+ def test_clip_box_overlap(self, pointsoutside_overlap_gdf, mask, request):
"""Test clip when intersection is empty and boxes do overlap."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(pointsoutside_overlap_gdf, mask)
+ assert result.empty
- def test_warning_extra_geoms_mixed(self, mixed_gdf, mask):
+ def test_warning_extra_geoms_mixed(self, mixed_gdf, mask, request):
"""Test the correct warnings are raised if keep_geom_type is
called on a mixed GDF"""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ with pytest.warns(UserWarning, match="Geometry types of input geodataframe"):
+ clip(mixed_gdf, mask, keep_geom_type=True)
- def test_warning_geomcoll(self, geomcol_gdf, mask):
+ def test_warning_geomcoll(self, geomcol_gdf, mask, request):
"""Test the correct warnings are raised if keep_geom_type is
called on a GDF with GeometryCollection"""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ with pytest.warns(UserWarning, match="Geometry types of input geodataframe"):
+ clip(geomcol_gdf, mask, keep_geom_type=True)
def test_clip_line_keep_slivers(sliver_line, single_rectangle_gdf):
"""Test the correct output if a point is returned
from a line only geometry type."""
- pass
+ result = clip(sliver_line, single_rectangle_gdf, keep_geom_type=False)
+ assert set(result.geometry.geom_type) == {'LineString', 'Point'}
def test_clip_multipoly_keep_slivers(multi_poly_gdf, single_rectangle_gdf):
"""Test a multi poly object where the return includes a sliver.
Also the bounds of the object should == the bounds of the clip object
if they fully overlap (as they do in these fixtures)."""
- pass
+ result = clip(multi_poly_gdf, single_rectangle_gdf, keep_geom_type=False)
+ assert set(result.geometry.geom_type) == {'Polygon', 'MultiPolygon'}
+ assert_index_equal(result.bounds, single_rectangle_gdf.bounds)
def test_clip_with_polygon(single_rectangle_gdf):
"""Test clip when using a shapely object"""
- pass
+ poly = Polygon([(0, 0), (1, 1), (1, 0)])
+ gdf = GeoDataFrame({'geometry': [poly]}, crs="EPSG:4326")
+ result = clip(gdf, single_rectangle_gdf.geometry.iloc[0])
+ assert isinstance(result, GeoDataFrame)
+ assert len(result) == 1
def test_clip_with_multipolygon(buffered_locations, single_rectangle_gdf):
"""Test clipping a polygon with a multipolygon."""
- pass
+ multi = MultiPolygon([single_rectangle_gdf.geometry.iloc[0], Polygon([(2, 2), (3, 3), (3, 2)])])
+ result = clip(buffered_locations, multi)
+ assert isinstance(result, GeoDataFrame)
+ assert all(result.geometry.geom_type == 'Polygon')
@pytest.mark.parametrize('mask_fixture_name', mask_variants_large_rectangle)
@@ -259,7 +357,9 @@ def test_clip_single_multipoly_no_extra_geoms(buffered_locations,
mask_fixture_name, request):
"""When clipping a multi-polygon feature, no additional geom types
should be returned."""
- pass
+ mask = request.getfixturevalue(mask_fixture_name)
+ result = clip(buffered_locations, mask)
+ assert set(result.geometry.geom_type) == {'Polygon'}
@pytest.mark.filterwarnings('ignore:All-NaN slice encountered')
@@ -269,9 +369,15 @@ def test_clip_single_multipoly_no_extra_geoms(buffered_locations,
'EPSG:3857'), GeoSeries([], crs='EPSG:3857').to_frame()])
def test_clip_empty_mask(buffered_locations, mask):
"""Test that clipping with empty mask returns an empty result."""
- pass
+ result = clip(buffered_locations, mask)
+ assert result.empty
def test_clip_sorting(point_gdf2):
"""Test the sorting kwarg in clip"""
- pass
+ box = Polygon([(0.5, 0.5), (1.5, 0.5), (1.5, 1.5), (0.5, 1.5)])
+ result = clip(point_gdf2, box)
+ assert_index_equal(result.index, point_gdf2.index[1:2])
+
+ result_unsorted = clip(point_gdf2, box, keep_geom_type=True)
+ assert_index_equal(result_unsorted.index, point_gdf2.index[1:2])
diff --git a/geopandas/tools/tests/test_sjoin.py b/geopandas/tools/tests/test_sjoin.py
index a4a880c..405d6d8 100644
--- a/geopandas/tools/tests/test_sjoin.py
+++ b/geopandas/tools/tests/test_sjoin.py
@@ -13,11 +13,50 @@ from pandas.testing import assert_frame_equal, assert_index_equal, assert_series
class TestSpatialJoin:
- pass
+ def test_spatial_join(self):
+ # Create sample GeoDataFrames
+ left_gdf = GeoDataFrame({
+ 'geometry': [Point(0, 0), Point(1, 1), Point(2, 2)],
+ 'value': [1, 2, 3]
+ })
+ right_gdf = GeoDataFrame({
+ 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0)]),
+ Polygon([(1, 1), (1, 2), (2, 2), (2, 1)])],
+ 'attr': ['A', 'B']
+ })
+
+ # Perform spatial join
+ result = sjoin(left_gdf, right_gdf, how='left', predicate='intersects')
+
+ # Assert the result
+ expected = GeoDataFrame({
+ 'geometry': [Point(0, 0), Point(1, 1), Point(2, 2)],
+ 'value': [1, 2, 3],
+ 'index_right': [0, 1, 1],
+ 'attr': ['A', 'B', 'B']
+ })
+ assert_geodataframe_equal(result, expected)
class TestIndexNames:
- pass
+ def test_index_names(self):
+ # Create sample GeoDataFrames with named indexes
+ left_gdf = GeoDataFrame({
+ 'geometry': [Point(0, 0), Point(1, 1)],
+ 'value': [1, 2]
+ }, index=pd.Index(['a', 'b'], name='left_idx'))
+ right_gdf = GeoDataFrame({
+ 'geometry': [Polygon([(0, 0), (0, 1), (1, 1), (1, 0)])],
+ 'attr': ['A']
+ }, index=pd.Index(['x'], name='right_idx'))
+
+ # Perform spatial join
+ result = sjoin(left_gdf, right_gdf, how='left', predicate='intersects')
+
+ # Assert the result
+ assert result.index.name == 'left_idx'
+ assert 'index_right' in result.columns
+ assert_series_equal(result['index_right'], pd.Series(['x', 'x'], name='index_right', index=['a', 'b']))
@pytest.mark.usefixtures('_setup_class_nybb_filename')
@@ -26,8 +65,41 @@ class TestSpatialJoinNYBB:
@pytest.mark.parametrize('predicate', ['intersects', 'within', 'contains'])
def test_sjoin_no_valid_geoms(self, predicate):
"""Tests a completely empty GeoDataFrame."""
- pass
+ empty_df = GeoDataFrame(geometry=[])
+ nybb = read_file(self.nybb_filename)
+
+ # Test empty left GeoDataFrame
+ result_left = sjoin(empty_df, nybb, how='left', predicate=predicate)
+ assert len(result_left) == 0
+ assert set(result_left.columns) == set(empty_df.columns).union(nybb.columns).union(['index_right'])
+
+ # Test empty right GeoDataFrame
+ result_right = sjoin(nybb, empty_df, how='left', predicate=predicate)
+ assert len(result_right) == len(nybb)
+ assert set(result_right.columns) == set(nybb.columns).union(empty_df.columns).union(['index_right'])
+ assert result_right['index_right'].isna().all()
class TestNearest:
- pass
+ def test_nearest_join(self):
+ # Create sample GeoDataFrames
+ left_gdf = GeoDataFrame({
+ 'geometry': [Point(0, 0), Point(1, 1), Point(2, 2)],
+ 'value': [1, 2, 3]
+ })
+ right_gdf = GeoDataFrame({
+ 'geometry': [Point(0.1, 0.1), Point(1.1, 1.1), Point(2.1, 2.1)],
+ 'attr': ['A', 'B', 'C']
+ })
+
+ # Perform nearest join
+ result = sjoin_nearest(left_gdf, right_gdf, how='left')
+
+ # Assert the result
+ expected = GeoDataFrame({
+ 'geometry': [Point(0, 0), Point(1, 1), Point(2, 2)],
+ 'value': [1, 2, 3],
+ 'index_right': [0, 1, 2],
+ 'attr': ['A', 'B', 'C']
+ })
+ assert_geodataframe_equal(result, expected)
diff --git a/geopandas/tools/tests/test_tools.py b/geopandas/tools/tests/test_tools.py
index a51978e..1dee985 100644
--- a/geopandas/tools/tests/test_tools.py
+++ b/geopandas/tools/tests/test_tools.py
@@ -1,8 +1,46 @@
-from shapely.geometry import LineString, MultiPoint, Point
+from shapely.geometry import LineString, MultiPoint, Point, Polygon
from geopandas import GeoSeries
from geopandas.tools import collect
import pytest
class TestTools:
- pass
+ def test_collect_points(self):
+ points = GeoSeries([Point(0, 0), Point(1, 1), Point(2, 2)])
+ result = collect(points)
+ assert isinstance(result, MultiPoint)
+ assert len(result.geoms) == 3
+
+ def test_collect_lines(self):
+ lines = GeoSeries([LineString([(0, 0), (1, 1)]), LineString([(1, 1), (2, 2)])])
+ result = collect(lines)
+ assert isinstance(result, LineString)
+ assert len(result.coords) == 3
+
+ def test_collect_mixed(self):
+ mixed = GeoSeries([Point(0, 0), LineString([(1, 1), (2, 2)]), Polygon([(0, 0), (1, 1), (1, 0)])])
+ result = collect(mixed)
+ assert isinstance(result, MultiPoint)
+ assert len(result.geoms) == 3
+
+ def test_collect_single(self):
+ single = GeoSeries([Point(0, 0)])
+ result = collect(single)
+ assert isinstance(result, Point)
+
+ def test_collect_empty(self):
+ empty = GeoSeries([])
+ result = collect(empty)
+ assert result is None
+
+ def test_collect_multi(self):
+ points = GeoSeries([Point(0, 0), Point(1, 1), Point(2, 2)])
+ result = collect(points, multi=True)
+ assert isinstance(result, MultiPoint)
+ assert len(result.geoms) == 3
+
+ def test_collect_lines_multi(self):
+ lines = GeoSeries([LineString([(0, 0), (1, 1)]), LineString([(1, 1), (2, 2)])])
+ result = collect(lines, multi=True)
+ assert isinstance(result, MultiPoint)
+ assert len(result.geoms) == 4
diff --git a/geopandas/tools/util.py b/geopandas/tools/util.py
index bea1b2d..b588293 100644
--- a/geopandas/tools/util.py
+++ b/geopandas/tools/util.py
@@ -1,6 +1,8 @@
import pandas as pd
-from shapely.geometry import MultiLineString, MultiPoint, MultiPolygon
+import numpy as np
+from shapely.geometry import MultiLineString, MultiPoint, MultiPolygon, GeometryCollection
from shapely.geometry.base import BaseGeometry
+from shapely.geometry.collection import MultiGeometryCollection
_multi_type_map = {'Point': MultiPoint, 'LineString': MultiLineString,
'Polygon': MultiPolygon}
@@ -17,5 +19,29 @@ def collect(x, multi=False):
if True, force returned geometries to be Multi* even if they
only have one component.
+ Returns
+ -------
+ geometry : Shapely geometry
+ A single Shapely geometry object
"""
- pass
+ if isinstance(x, BaseGeometry):
+ if multi and not isinstance(x, tuple(_multi_type_map.values())):
+ return _multi_type_map[x.geom_type]([x])
+ return x
+
+ if isinstance(x, pd.Series):
+ x = x.values
+
+ types = list(set([geom.geom_type for geom in x]))
+ if len(types) > 1:
+ return MultiGeometryCollection(list(x))
+
+ geom_type = types[0]
+ if geom_type in _multi_type_map:
+ multi_type = _multi_type_map[geom_type]
+ if multi or len(x) > 1:
+ return multi_type(list(x))
+ else:
+ return x[0]
+ else:
+ return GeometryCollection(list(x))