From f2c8048a16c2cb3ee8299f4d32b4e466169d8bc1 Mon Sep 17 00:00:00 2001 From: Luuk van der Meer Date: Tue, 17 Sep 2024 17:14:10 +0200 Subject: [PATCH] refactor: Use a new workflow for precision handling in point equality tests part II. Refs #130 :construction: --- R/blend.R | 2 +- R/create.R | 46 ++------ R/join.R | 10 +- R/morphers.R | 177 ++----------------------------- R/subdivide.R | 5 +- R/utils.R | 41 +++++-- man/create_from_spatial_lines.Rd | 6 ++ man/spatial_morphers.Rd | 10 +- man/st_network_join.Rd | 9 +- 9 files changed, 80 insertions(+), 226 deletions(-) diff --git a/R/blend.R b/R/blend.R index 4344bf57..60880eae 100644 --- a/R/blend.R +++ b/R/blend.R @@ -241,7 +241,7 @@ blend_ = function(x, y, tolerance) { # Remove duplicated features in y. # These features will have the same blending location. # Only one point can be blended per location. - is_duplicated = st_duplicated(Y) + is_duplicated = st_duplicated_points(Y) Y = Y[!is_duplicated] ## ========================================== # STEP V: INCLUDE FEATURES IN EDGE GEOMETRIES diff --git a/R/create.R b/R/create.R index 6b390fab..b7afba7c 100644 --- a/R/create.R +++ b/R/create.R @@ -414,6 +414,11 @@ as_sfnetwork.focused_tbl_graph = function(x, ...) { #' in the network. Nodes are created at the line boundaries. Shared boundaries #' between multiple linestrings become the same node. #' +#' @note By default sfnetworks rounds coordinates to 12 decimal places to +#' determine spatial equality. You can influence this behavior by explicitly +#' setting the precision of the linestrings using +#' \code{\link[sf]{st_set_precision}}. +#' #' @return An object of class \code{\link{sfnetwork}}. #' #' @examples @@ -429,51 +434,12 @@ as_sfnetwork.focused_tbl_graph = function(x, ...) { #' #' par(oldpar) #' -#' @importFrom sf st_as_sf st_sf +#' @importFrom sf st_as_sf st_precision st_sf #' @export create_from_spatial_lines = function(x, directed = TRUE, compute_length = FALSE) { # The provided lines will form the edges of the network. edges = st_as_sf(x) - # Get the boundary points of the edges. - nodes = linestring_boundary_points(edges) - # Give each unique location a unique ID. - indices = st_match(nodes) - # Define for each endpoint if it is a source or target node. - is_source = rep(c(TRUE, FALSE), length(nodes) / 2) - # Define for each edge which node is its source and target node. - if ("from" %in% colnames(edges)) raise_overwrite("from") - edges$from = indices[is_source] - if ("to" %in% colnames(edges)) raise_overwrite("to") - edges$to = indices[!is_source] - # Remove duplicated nodes from the nodes table. - nodes = nodes[!duplicated(indices)] - # Convert to sf object - nodes = st_sf(geometry = nodes) - # Use the same sf column name in the nodes as in the edges. - geom_colname = attr(edges, "sf_column") - if (geom_colname != "geometry") { - names(nodes)[1] = geom_colname - attr(nodes, "sf_column") = geom_colname - } - # Use the same class for the nodes as for the edges. - # This mainly affects the "lower level" classes. - # For example an sf tibble instead of a sf data frame. - class(nodes) = class(edges) - # Create a network out of the created nodes and the provided edges. - # Force to skip network validity tests because we already know they pass. - sfnetwork(nodes, edges, - directed = directed, - edges_as_lines = TRUE, - compute_length = compute_length, - force = TRUE - ) -} - -create_from_spatial_lines_v2 = function(x, directed = TRUE, - compute_length = FALSE) { - # The provided lines will form the edges of the network. - edges = st_as_sf(x) # Get the coordinates of the boundary points of the edges. # These will form the nodes of the network. node_coords = linestring_boundary_points(edges, return_df = TRUE) diff --git a/R/join.R b/R/join.R index db63f144..0dd96df4 100644 --- a/R/join.R +++ b/R/join.R @@ -1,8 +1,7 @@ #' Join two spatial networks based on equality of node geometries #' #' A spatial network specific join function which makes a spatial full join on -#' the geometries of the nodes data, based on the \code{\link[sf]{st_equals}} -#' spatial predicate. Edge data are combined using a +#' the geometries of the nodes data. Edge data are combined using a #' \code{\link[dplyr]{bind_rows}} semantic, meaning that data are matched by #' column name and values are filled with \code{NA} if missing in either of #' the networks. The \code{from} and \code{to} columns in the edge data are @@ -15,6 +14,11 @@ #' #' @param ... Arguments passed on to \code{\link[tidygraph]{graph_join}}. #' +#' @note By default sfnetworks rounds coordinates to 12 decimal places to +#' determine spatial equality. You can influence this behavior by explicitly +#' setting the precision of the networks using +#' \code{\link[sf]{st_set_precision}}. +#' #' @return The joined networks as an object of class \code{\link{sfnetwork}}. #' #' @examples @@ -86,7 +90,7 @@ spatial_join_network = function(x, y, ...) { N_x = vertex_attr(x, x_geomcol) N_y = vertex_attr(y, y_geomcol) N = c(N_x, N_y) - uid = st_match(N) + uid = st_match_points(N) # Store the unique node indices as node attributes in both x and y. if (".sfnetwork_index" %in% c(vertex_attr_names(x), vertex_attr_names(y))) { raise_reserved_attr(".sfnetwork_index") diff --git a/R/morphers.R b/R/morphers.R index dc856ec1..c5b1282b 100644 --- a/R/morphers.R +++ b/R/morphers.R @@ -838,7 +838,10 @@ to_spatial_smooth = function(x, #' be merged into a single node, and should subdivision points at the same #' as an existing node be merged into that node? Defaults to \code{TRUE}. If #' set to \code{FALSE}, each subdivision point is added separately as a new -#' node to the network. +#' node to the network. By default sfnetworks rounds coordinates to 12 decimal +#' places to determine spatial equality. You can influence this behavior by +#' explicitly setting the precision of the network using +#' \code{\link[sf]{st_set_precision}}. #' #' @export to_spatial_subdivision = function(x, merge_equal = TRUE) { @@ -890,7 +893,10 @@ to_spatial_transformed = function(x, ...) { #' @describeIn spatial_morphers Merge nodes with equal geometries into a single #' node. Returns a \code{morphed_sfnetwork} containing a single element of -#' class \code{\link{sfnetwork}}. +#' class \code{\link{sfnetwork}}. By default sfnetworks rounds coordinates to +#' 12 decimal places to determine spatial equality. You can influence this +#' behavior by explicitly setting the precision of the network using +#' \code{\link[sf]{st_set_precision}}. #' #' @importFrom igraph contract delete_vertex_attr #' @importFrom sf st_as_sf st_geometry @@ -941,170 +947,3 @@ to_spatial_unique = function(x, summarise_attributes = "ignore", unique = tbg_to_sfn(x_new %preserve_network_attrs% x) ) } - - -to_spatial_subdivision_orig = function(x) { - if (will_assume_constant(x)) raise_assume_constant("to_spatial_subdivision") - # Retrieve nodes and edges from the network. - nodes = nodes_as_sf(x) - edges = edges_as_sf(x) - # For later use: - # --> Check wheter x is directed. - directed = is_directed(x) - ## =========================== - # STEP I: DECOMPOSE THE EDGES - # Decompose the edges linestring geometries into the points that shape them. - ## =========================== - # Extract all points from the linestring geometries of the edges. - edge_pts = sf_to_df(edges) - # Extract two subsets of information: - # --> One with only the coordinates of the points - # --> Another with indices describing to which edge a point belonged. - edge_coords = edge_pts[names(edge_pts) %in% c("x", "y", "z", "m")] - edge_idxs = edge_pts$linestring_id - ## ======================================= - # STEP II: DEFINE WHERE TO SUBDIVIDE EDGES - # Edges should be split at locations where: - # --> An edge interior point is equal to a boundary point in another edge. - # --> An edge interior point is equal to an interior point in another edge. - # Hence, we need to split edges at point that: - # --> Are interior points. - # --> Have at least one duplicate among the other edge points. - ## ======================================= - # Find which of the edge points is a boundary point. - is_startpoint = !duplicated(edge_idxs) - is_endpoint = !duplicated(edge_idxs, fromLast = TRUE) - is_boundary = is_startpoint | is_endpoint - # Find which of the edge points occur more than once. - is_duplicate_desc = duplicated(edge_coords) - is_duplicate_asc = duplicated(edge_coords, fromLast = TRUE) - has_duplicate = is_duplicate_desc | is_duplicate_asc - # Split points are those edge points satisfying both of the following rules: - # --> 1) They have at least one duplicate among the other edge points. - # --> 2) They are not edge boundary points themselves. - is_split = has_duplicate & !is_boundary - if (! any(is_split)) return (x) - ## ================================ - # STEP III: DUPLICATE SPLIT POINTS - # The split points are currently a single interior point in an edge. - # They will become the endpoint of one edge *and* the startpoint of another. - # Hence, each split point needs to be duplicated. - ## ================================ - # Create the repetition vector: - # --> This defines for each edge point if it should be duplicated. - # --> A value of '1' means 'store once', i.e. don't duplicate. - # --> A value of '2' means 'store twice', i.e. duplicate. - # --> Split points will be part of two new edges and should be duplicated. - reps = rep(1L, nrow(edge_coords)) - reps[is_split] = 2L - # Create the new coordinate data frame by duplicating split points. - new_edge_coords = data.frame(lapply(edge_coords, function(i) rep(i, reps))) - ## ========================================== - # STEP IV: CONSTRUCT THE NEW EDGES GEOMETRIES - # With the new coords of the edge points we need to recreate linestrings. - # First we need to know which edge points belong to which *new* edge. - # Then we need to build a linestring geometry for each new edge. - ## ========================================== - # First assign each new edge point coordinate its *original* edge index. - # --> Then increment those accordingly at each split point. - orig_edge_idxs = rep(edge_idxs, reps) - # Original edges are subdivided at each split point. - # Therefore, a new edge originates from each split point. - # Hence, to get the new edge indices: - # --> Increment each original edge index by 1 at each split point. - incs = integer(nrow(new_edge_coords)) # By default don't increment. - incs[which(is_split) + 1:sum(is_split)] = 1L # Add 1 after each split. - new_edge_idxs = orig_edge_idxs + cumsum(incs) - new_edge_coords$edge_id = new_edge_idxs - # Build the new edge geometries. - new_edge_geoms = sfc_linestring(new_edge_coords, linestring_id = "edge_id") - st_crs(new_edge_geoms) = st_crs(edges) - st_precision(new_edge_geoms) = st_precision(edges) - new_edge_coords$edge_id = NULL - ## =================================== - # STEP V: CONSTRUCT THE NEW EDGE DATA - # We now have the geometries of the new edges. - # However, the original edge attributes got lost. - # We will restore them by: - # --> Adding back the attributes to edges that were not split. - # --> Duplicating original attributes within splitted edges. - # Beware that from and to columns will remain unchanged at this stage. - # We will update them later. - ## =================================== - # Find which *original* edge belongs to which *new* edge: - # --> Use the lists of edge indices mapped to the new edge points. - # --> There we already mapped each new edge point to its original edge. - # --> First define which new edge points are startpoints of new edges. - # --> Then retrieve the original edge index from these new startpoints. - # --> This gives us a single original edge index for each new edge. - is_new_startpoint = !duplicated(new_edge_idxs) - orig_edge_idxs = orig_edge_idxs[is_new_startpoint] - # Duplicate original edge data whenever needed. - new_edges = edges[orig_edge_idxs, ] - # Set the new edge geometries as geometries of these new edges. - st_geometry(new_edges) = new_edge_geoms - ## ========================================== - # STEP VI: CONSTRUCT THE NEW NODE GEOMETRIES - # All split points are now boundary points of new edges. - # All edge boundaries become nodes in the network. - ## ========================================== - is_new_boundary = rep(is_split | is_boundary, reps) - new_node_geoms = sfc_point(new_edge_coords[is_new_boundary, ]) - st_crs(new_node_geoms) = st_crs(nodes) - st_precision(new_node_geoms) = st_precision(nodes) - ## ===================================== - # STEP VII: CONSTRUCT THE NEW NODE DATA - # We now have the geometries of the new nodes. - # However, the original node attributes got lost. - # We will restore them by: - # --> Adding back the attributes to nodes that were already a node before. - # --> Filling attribute values of newly added nodes with NA. - # Beware at this stage the nodes are recreated from scratch. - # That means each boundary point of the new edges is stored as separate node. - # Boundaries with equal geometries will be merged into a single node later. - ## ===================================== - # Find which of the *original* edge points equaled which *original* node. - # If an edge point did not equal a node, store NA instead. - node_idxs = rep(NA, nrow(edge_pts)) - if (directed) { - node_idxs[is_boundary] = edge_incident_ids(x) - } else { - node_idxs[is_boundary] = edge_boundary_ids(x) - } - # Find which of the *original* nodes belong to which *new* edge boundary. - # If a new edge boundary does not equal an original node, store NA instead. - orig_node_idxs = rep(node_idxs, reps)[is_new_boundary] - # Retrieve original node data for each new edge boundary. - # Rows of newly added nodes will be NA. - new_nodes = nodes[orig_node_idxs, ] - # Set the new node geometries as geometries of these new nodes. - st_geometry(new_nodes) = new_node_geoms - ## ================================================== - # STEP VIII: UPDATE FROM AND TO INDICES OF NEW EDGES - # Now we updated the node data, the node indices changes. - # Therefore we need to update the from and to columns of the edges as well. - ## ================================================== - # Define the indices of the new nodes. - # Equal geometries should get the same index. - new_node_idxs = st_match(new_node_geoms) - # Map node indices to edges. - is_source = rep(c(TRUE, FALSE), length(new_node_geoms) / 2) - new_edges$from = new_node_idxs[is_source] - new_edges$to = new_node_idxs[!is_source] - ## ============================= - # STEP IX: UPDATE THE NEW NODES - # We can now remove the duplicated node geometries from the new nodes data. - # Then, each location is represented by a single node. - ## ============================= - new_nodes = new_nodes[!duplicated(new_node_idxs), ] - ## ============================ - # STEP X: RECREATE THE NETWORK - # Use the new nodes data and the new edges data to create the new network. - ## ============================ - # Create new network. - x_new = sfnetwork_(new_nodes, new_edges, directed = directed) - # Return in a list. - list( - subdivision = x_new %preserve_network_attrs% x - ) -} diff --git a/R/subdivide.R b/R/subdivide.R index f0714e26..d5a02160 100644 --- a/R/subdivide.R +++ b/R/subdivide.R @@ -13,7 +13,10 @@ #' be merged into a single node, and should subdivision points at the same #' as an existing node be merged into that node? Defaults to \code{TRUE}. If #' set to \code{FALSE}, each subdivision point is added separately as a new -#' node to the network. +#' node to the network. By default sfnetworks rounds coordinates to 12 decimal +#' places to determine spatial equality. You can influence this behavior by +#' explicitly setting the precision of the network using +#' \code{\link[sf]{st_set_precision}}. #' #' @returns A subdivision of x as object of class \code{\link{sfnetwork}}. #' diff --git a/R/utils.R b/R/utils.R index 259ba43b..1181fb0d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -24,6 +24,20 @@ st_duplicated = function(x) { dup } +#' @importFrom sf st_geometry +#' @importFrom sfheaders sfc_to_df +st_duplicated_points = function(x, precision = attr(x, "precision")) { + x_df = sfc_to_df(st_geometry(x)) + coords = x_df[, names(x_df) %in% c("x", "y", "z", "m")] + st_duplicated_points_df(coords, precision = precision) +} + +st_duplicated_points_df = function(x, precision = NULL) { + x_trim = lapply(x, round, digits = precision_digits(precision)) + x_concat = do.call(paste, x_trim) + duplicated(x_concat) +} + #' Geometry matching #' #' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}}. @@ -58,18 +72,11 @@ st_match_points = function(x, precision = attr(x, "precision")) { } st_match_points_df = function(x, precision = NULL) { - x_trim = lapply(x, round, digits = precision_to_digits(precision)) + x_trim = lapply(x, round, digits = precision_digits(precision)) x_concat = do.call(paste, x_trim) match(x_concat, unique(x_concat)) } -#' @importFrom cli cli_abort -precision_to_digits = function(x) { - if (is.null(x) || x == 0) return (12) - if (x > 0) return (log(x, 10)) - cli_abort("Currently sfnetworks does not support negative precision") -} - #' Rounding of coordinates of point and linestring geometries #' #' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}}. @@ -425,6 +432,24 @@ merge_mranges = function(a, b) { ab } +#' Infer the number of decimal places from a fixed precision scale factor +#' +#' @param x A fixed precision scale factor. +#' +#' @details For more information on fixed precision scale factors see +#' \code{\link[sf]{st_as_binary}}. When the precision scale factor is 0 +#' or not defined, sfnetworks defaults to 12 decimal places. +#' +#' @return A numeric value specifying the number of decimal places. +#' +#' @importFrom cli cli_abort +#' @noRd +precision_digits = function(x) { + if (is.null(x) || x == 0) return (12) + if (x > 0) return (log(x, 10)) + cli_abort("Currently sfnetworks does not support negative precision") +} + #' List-column friendly version of bind_rows #' #' @param ... Tables to be row-binded. diff --git a/man/create_from_spatial_lines.Rd b/man/create_from_spatial_lines.Rd index 022bcf25..6741990f 100644 --- a/man/create_from_spatial_lines.Rd +++ b/man/create_from_spatial_lines.Rd @@ -32,6 +32,12 @@ It is assumed that the given linestring geometries form the edges in the network. Nodes are created at the line boundaries. Shared boundaries between multiple linestrings become the same node. } +\note{ +By default sfnetworks rounds coordinates to 12 decimal places to +determine spatial equality. You can influence this behavior by explicitly +setting the precision of the linestrings using +\code{\link[sf]{st_set_precision}}. +} \examples{ library(sf, quietly = TRUE) diff --git a/man/spatial_morphers.Rd b/man/spatial_morphers.Rd index 11603019..f87e871d 100644 --- a/man/spatial_morphers.Rd +++ b/man/spatial_morphers.Rd @@ -127,7 +127,10 @@ using the \code{==} operator.} be merged into a single node, and should subdivision points at the same as an existing node be merged into that node? Defaults to \code{TRUE}. If set to \code{FALSE}, each subdivision point is added separately as a new -node to the network.} +node to the network. By default sfnetworks rounds coordinates to 12 decimal +places to determine spatial equality. You can influence this behavior by +explicitly setting the precision of the network using +\code{\link[sf]{st_set_precision}}.} \item{subset_by}{Whether to create subgraphs based on nodes or edges.} } @@ -242,7 +245,10 @@ Returns a \code{morphed_sfnetwork} containing a single element of class \item \code{to_spatial_unique()}: Merge nodes with equal geometries into a single node. Returns a \code{morphed_sfnetwork} containing a single element of -class \code{\link{sfnetwork}}. +class \code{\link{sfnetwork}}. By default sfnetworks rounds coordinates to +12 decimal places to determine spatial equality. You can influence this +behavior by explicitly setting the precision of the network using +\code{\link[sf]{st_set_precision}}. }} \examples{ diff --git a/man/st_network_join.Rd b/man/st_network_join.Rd index 3f711648..1512195d 100644 --- a/man/st_network_join.Rd +++ b/man/st_network_join.Rd @@ -19,13 +19,18 @@ The joined networks as an object of class \code{\link{sfnetwork}}. } \description{ A spatial network specific join function which makes a spatial full join on -the geometries of the nodes data, based on the \code{\link[sf]{st_equals}} -spatial predicate. Edge data are combined using a +the geometries of the nodes data. Edge data are combined using a \code{\link[dplyr]{bind_rows}} semantic, meaning that data are matched by column name and values are filled with \code{NA} if missing in either of the networks. The \code{from} and \code{to} columns in the edge data are updated such that they match the new node indices of the resulting network. } +\note{ +By default sfnetworks rounds coordinates to 12 decimal places to +determine spatial equality. You can influence this behavior by explicitly +setting the precision of the networks using +\code{\link[sf]{st_set_precision}}. +} \examples{ library(sf, quietly = TRUE)