From a7c6de4fe5a380ee7df9dde113f37bc96d732c91 Mon Sep 17 00:00:00 2001 From: Luuk van der Meer Date: Mon, 16 Sep 2024 19:29:57 +0200 Subject: [PATCH] refactor: Use a new workflow for precision handling in point equality tests. Refs #130 :construction: --- R/create.R | 42 +++++++++++++ R/morphers.R | 169 +++++++++++++++++++++++++++++++++++++++++++++++++- R/subdivide.R | 2 +- R/utils.R | 63 +++++++++++++++---- 4 files changed, 263 insertions(+), 13 deletions(-) diff --git a/R/create.R b/R/create.R index ad96236e..6b390fab 100644 --- a/R/create.R +++ b/R/create.R @@ -470,6 +470,48 @@ create_from_spatial_lines = function(x, directed = TRUE, ) } +create_from_spatial_lines_v2 = function(x, directed = TRUE, + compute_length = FALSE) { + # The provided lines will form the edges of the network. + edges = st_as_sf(x) + # Get the coordinates of the boundary points of the edges. + # These will form the nodes of the network. + node_coords = linestring_boundary_points(edges, return_df = TRUE) + # Give each unique location a unique ID. + indices = st_match_points_df(node_coords, st_precision(x)) + # Convert the node coordinates into point geometry objects. + nodes = df_to_points(node_coords, x, select = FALSE) + # Define for each endpoint if it is a source or target node. + is_source = rep(c(TRUE, FALSE), length(nodes) / 2) + # Define for each edge which node is its source and target node. + if ("from" %in% colnames(edges)) raise_overwrite("from") + edges$from = indices[is_source] + if ("to" %in% colnames(edges)) raise_overwrite("to") + edges$to = indices[!is_source] + # Remove duplicated nodes from the nodes table. + nodes = nodes[!duplicated(indices)] + # Convert to sf object + nodes = st_sf(geometry = nodes) + # Use the same sf column name in the nodes as in the edges. + geom_colname = attr(edges, "sf_column") + if (geom_colname != "geometry") { + names(nodes)[1] = geom_colname + attr(nodes, "sf_column") = geom_colname + } + # Use the same class for the nodes as for the edges. + # This mainly affects the "lower level" classes. + # For example an sf tibble instead of a sf data frame. + class(nodes) = class(edges) + # Create a network out of the created nodes and the provided edges. + # Force to skip network validity tests because we already know they pass. + sfnetwork(nodes, edges, + directed = directed, + edges_as_lines = TRUE, + compute_length = compute_length, + force = TRUE + ) +} + #' Create a spatial network from point geometries #' #' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}} diff --git a/R/morphers.R b/R/morphers.R index 22412873..dc856ec1 100644 --- a/R/morphers.R +++ b/R/morphers.R @@ -905,7 +905,7 @@ to_spatial_unique = function(x, summarise_attributes = "ignore", node_geoms = st_geometry(nodes) node_geomcol = attr(nodes, "sf_column") # Define which nodes have equal geometries. - matches = st_match(node_geoms) + matches = st_match_points(node_geoms) # Update the attribute summary instructions. # During morphing tidygraph adds the tidygraph node index column. # Since it is added internally it is not referenced in summarise_attributes. @@ -941,3 +941,170 @@ to_spatial_unique = function(x, summarise_attributes = "ignore", unique = tbg_to_sfn(x_new %preserve_network_attrs% x) ) } + + +to_spatial_subdivision_orig = function(x) { + if (will_assume_constant(x)) raise_assume_constant("to_spatial_subdivision") + # Retrieve nodes and edges from the network. + nodes = nodes_as_sf(x) + edges = edges_as_sf(x) + # For later use: + # --> Check wheter x is directed. + directed = is_directed(x) + ## =========================== + # STEP I: DECOMPOSE THE EDGES + # Decompose the edges linestring geometries into the points that shape them. + ## =========================== + # Extract all points from the linestring geometries of the edges. + edge_pts = sf_to_df(edges) + # Extract two subsets of information: + # --> One with only the coordinates of the points + # --> Another with indices describing to which edge a point belonged. + edge_coords = edge_pts[names(edge_pts) %in% c("x", "y", "z", "m")] + edge_idxs = edge_pts$linestring_id + ## ======================================= + # STEP II: DEFINE WHERE TO SUBDIVIDE EDGES + # Edges should be split at locations where: + # --> An edge interior point is equal to a boundary point in another edge. + # --> An edge interior point is equal to an interior point in another edge. + # Hence, we need to split edges at point that: + # --> Are interior points. + # --> Have at least one duplicate among the other edge points. + ## ======================================= + # Find which of the edge points is a boundary point. + is_startpoint = !duplicated(edge_idxs) + is_endpoint = !duplicated(edge_idxs, fromLast = TRUE) + is_boundary = is_startpoint | is_endpoint + # Find which of the edge points occur more than once. + is_duplicate_desc = duplicated(edge_coords) + is_duplicate_asc = duplicated(edge_coords, fromLast = TRUE) + has_duplicate = is_duplicate_desc | is_duplicate_asc + # Split points are those edge points satisfying both of the following rules: + # --> 1) They have at least one duplicate among the other edge points. + # --> 2) They are not edge boundary points themselves. + is_split = has_duplicate & !is_boundary + if (! any(is_split)) return (x) + ## ================================ + # STEP III: DUPLICATE SPLIT POINTS + # The split points are currently a single interior point in an edge. + # They will become the endpoint of one edge *and* the startpoint of another. + # Hence, each split point needs to be duplicated. + ## ================================ + # Create the repetition vector: + # --> This defines for each edge point if it should be duplicated. + # --> A value of '1' means 'store once', i.e. don't duplicate. + # --> A value of '2' means 'store twice', i.e. duplicate. + # --> Split points will be part of two new edges and should be duplicated. + reps = rep(1L, nrow(edge_coords)) + reps[is_split] = 2L + # Create the new coordinate data frame by duplicating split points. + new_edge_coords = data.frame(lapply(edge_coords, function(i) rep(i, reps))) + ## ========================================== + # STEP IV: CONSTRUCT THE NEW EDGES GEOMETRIES + # With the new coords of the edge points we need to recreate linestrings. + # First we need to know which edge points belong to which *new* edge. + # Then we need to build a linestring geometry for each new edge. + ## ========================================== + # First assign each new edge point coordinate its *original* edge index. + # --> Then increment those accordingly at each split point. + orig_edge_idxs = rep(edge_idxs, reps) + # Original edges are subdivided at each split point. + # Therefore, a new edge originates from each split point. + # Hence, to get the new edge indices: + # --> Increment each original edge index by 1 at each split point. + incs = integer(nrow(new_edge_coords)) # By default don't increment. + incs[which(is_split) + 1:sum(is_split)] = 1L # Add 1 after each split. + new_edge_idxs = orig_edge_idxs + cumsum(incs) + new_edge_coords$edge_id = new_edge_idxs + # Build the new edge geometries. + new_edge_geoms = sfc_linestring(new_edge_coords, linestring_id = "edge_id") + st_crs(new_edge_geoms) = st_crs(edges) + st_precision(new_edge_geoms) = st_precision(edges) + new_edge_coords$edge_id = NULL + ## =================================== + # STEP V: CONSTRUCT THE NEW EDGE DATA + # We now have the geometries of the new edges. + # However, the original edge attributes got lost. + # We will restore them by: + # --> Adding back the attributes to edges that were not split. + # --> Duplicating original attributes within splitted edges. + # Beware that from and to columns will remain unchanged at this stage. + # We will update them later. + ## =================================== + # Find which *original* edge belongs to which *new* edge: + # --> Use the lists of edge indices mapped to the new edge points. + # --> There we already mapped each new edge point to its original edge. + # --> First define which new edge points are startpoints of new edges. + # --> Then retrieve the original edge index from these new startpoints. + # --> This gives us a single original edge index for each new edge. + is_new_startpoint = !duplicated(new_edge_idxs) + orig_edge_idxs = orig_edge_idxs[is_new_startpoint] + # Duplicate original edge data whenever needed. + new_edges = edges[orig_edge_idxs, ] + # Set the new edge geometries as geometries of these new edges. + st_geometry(new_edges) = new_edge_geoms + ## ========================================== + # STEP VI: CONSTRUCT THE NEW NODE GEOMETRIES + # All split points are now boundary points of new edges. + # All edge boundaries become nodes in the network. + ## ========================================== + is_new_boundary = rep(is_split | is_boundary, reps) + new_node_geoms = sfc_point(new_edge_coords[is_new_boundary, ]) + st_crs(new_node_geoms) = st_crs(nodes) + st_precision(new_node_geoms) = st_precision(nodes) + ## ===================================== + # STEP VII: CONSTRUCT THE NEW NODE DATA + # We now have the geometries of the new nodes. + # However, the original node attributes got lost. + # We will restore them by: + # --> Adding back the attributes to nodes that were already a node before. + # --> Filling attribute values of newly added nodes with NA. + # Beware at this stage the nodes are recreated from scratch. + # That means each boundary point of the new edges is stored as separate node. + # Boundaries with equal geometries will be merged into a single node later. + ## ===================================== + # Find which of the *original* edge points equaled which *original* node. + # If an edge point did not equal a node, store NA instead. + node_idxs = rep(NA, nrow(edge_pts)) + if (directed) { + node_idxs[is_boundary] = edge_incident_ids(x) + } else { + node_idxs[is_boundary] = edge_boundary_ids(x) + } + # Find which of the *original* nodes belong to which *new* edge boundary. + # If a new edge boundary does not equal an original node, store NA instead. + orig_node_idxs = rep(node_idxs, reps)[is_new_boundary] + # Retrieve original node data for each new edge boundary. + # Rows of newly added nodes will be NA. + new_nodes = nodes[orig_node_idxs, ] + # Set the new node geometries as geometries of these new nodes. + st_geometry(new_nodes) = new_node_geoms + ## ================================================== + # STEP VIII: UPDATE FROM AND TO INDICES OF NEW EDGES + # Now we updated the node data, the node indices changes. + # Therefore we need to update the from and to columns of the edges as well. + ## ================================================== + # Define the indices of the new nodes. + # Equal geometries should get the same index. + new_node_idxs = st_match(new_node_geoms) + # Map node indices to edges. + is_source = rep(c(TRUE, FALSE), length(new_node_geoms) / 2) + new_edges$from = new_node_idxs[is_source] + new_edges$to = new_node_idxs[!is_source] + ## ============================= + # STEP IX: UPDATE THE NEW NODES + # We can now remove the duplicated node geometries from the new nodes data. + # Then, each location is represented by a single node. + ## ============================= + new_nodes = new_nodes[!duplicated(new_node_idxs), ] + ## ============================ + # STEP X: RECREATE THE NETWORK + # Use the new nodes data and the new edges data to create the new network. + ## ============================ + # Create new network. + x_new = sfnetwork_(new_nodes, new_edges, directed = directed) + # Return in a list. + list( + subdivision = x_new %preserve_network_attrs% x + ) +} diff --git a/R/subdivide.R b/R/subdivide.R index 453ec90f..f0714e26 100644 --- a/R/subdivide.R +++ b/R/subdivide.R @@ -59,7 +59,7 @@ subdivide = function(x, merge_equal = TRUE) { # Compute for each edge point a unique location index. # Edge points that are spatially equal get the same location index. edge_coords = edge_pts[names(edge_pts) %in% c("x", "y", "z", "m")] - edge_lids = st_match_df(edge_coords) + edge_lids = st_match_points_df(edge_coords, st_precision(edges)) edge_pts$lid = edge_lids # Define which edge points are not unique. has_duplicate = duplicated(edge_lids) | duplicated(edge_lids, fromLast = TRUE) diff --git a/R/utils.R b/R/utils.R index 49fc43e7..259ba43b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -49,9 +49,25 @@ st_match = function(x) { match(idxs, unique(idxs)) } -st_match_df = function(x) { - x_str = do.call(paste, x) - match(x_str, unique(x_str)) +#' @importFrom sf st_geometry +#' @importFrom sfheaders sfc_to_df +st_match_points = function(x, precision = attr(x, "precision")) { + x_df = sfc_to_df(st_geometry(x)) + coords = x_df[, names(x_df) %in% c("x", "y", "z", "m")] + st_match_points_df(coords, precision = precision) +} + +st_match_points_df = function(x, precision = NULL) { + x_trim = lapply(x, round, digits = precision_to_digits(precision)) + x_concat = do.call(paste, x_trim) + match(x_concat, unique(x_concat)) +} + +#' @importFrom cli cli_abort +precision_to_digits = function(x) { + if (is.null(x) || x == 0) return (12) + if (x > 0) return (log(x, 10)) + cli_abort("Currently sfnetworks does not support negative precision") } #' Rounding of coordinates of point and linestring geometries @@ -164,9 +180,16 @@ df_to_lines = function(x_df, x_sf, id_col = "linestring_id", select = TRUE) { #' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}} #' with \code{LINESTRING} geometries. #' +#' @param return_df Should a data frame with one column per coordinate be +#' returned instead of a \code{\link[sf]{sfc}} object? Defaults to +#' \code{FALSE}. +#' #' @return An object of class \code{\link[sf]{sfc}} with \code{POINT} #' geometries, of length equal to twice the number of lines in x, and ordered #' as [start of line 1, end of line 1, start of line 2, end of line 2, ...]. +#' If \code{return_df = TRUE}, a data frame with one column per coordinate is +#' returned instead, with number of rows equal to twice the number of lines in +#' x. #' #' @details With boundary points we mean the points at the start and end of #' a linestring. @@ -174,12 +197,14 @@ df_to_lines = function(x_df, x_sf, id_col = "linestring_id", select = TRUE) { #' @importFrom sf st_geometry #' @importFrom sfheaders sfc_to_df #' @noRd -linestring_boundary_points = function(x) { +linestring_boundary_points = function(x, return_df = FALSE) { coords = sfc_to_df(st_geometry(x)) is_start = !duplicated(coords[["linestring_id"]]) is_end = !duplicated(coords[["linestring_id"]], fromLast = TRUE) is_bound = is_start | is_end - df_to_points(coords[is_bound, ], x) + bounds = coords[is_bound, names(coords) %in% c("x", "y", "z", "m")] + if (return_df) return (bounds) + df_to_points(bounds, x, select = FALSE) } #' Get the start points of linestring geometries @@ -187,16 +212,24 @@ linestring_boundary_points = function(x) { #' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}} #' with \code{LINESTRING} geometries. #' +#' @param return_df Should a data frame with one column per coordinate be +#' returned instead of a \code{\link[sf]{sfc}} object? Defaults to +#' \code{FALSE}. +#' #' @return An object of class \code{\link[sf]{sfc}} with \code{POINT} -#' geometries, of length equal to the number of lines in x. +#' geometries, of length equal to the number of lines in x. If +#' \code{return_df = TRUE}, a data frame with one column per coordinate is +#' returned instead, with number of rows equal to the number of lines in x. #' #' @importFrom sf st_geometry #' @importFrom sfheaders sfc_to_df #' @noRd -linestring_start_points = function(x) { +linestring_start_points = function(x, return_df = FALSE) { coords = sfc_to_df(st_geometry(x)) is_start = !duplicated(coords[["linestring_id"]]) - df_to_points(coords[is_start, ], x) + starts = coords[is_start, names(coords) %in% c("x", "y", "z", "m")] + if (return_df) return (starts) + df_to_points(starts, x, select = FALSE) } #' Get the end points of linestring geometries @@ -204,16 +237,24 @@ linestring_start_points = function(x) { #' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}} #' with \code{LINESTRING} geometries. #' +#' @param return_df Should a data frame with one column per coordinate be +#' returned instead of a \code{\link[sf]{sfc}} object? Defaults to +#' \code{FALSE}. +#' #' @return An object of class \code{\link[sf]{sfc}} with \code{POINT} -#' geometries, of length equal to the number of lines in x. +#' geometries, of length equal to the number of lines in x. If +#' \code{return_df = TRUE}, a data frame with one column per coordinate is +#' returned instead, with number of rows equal to the number of lines in x. #' #' @importFrom sf st_geometry #' @importFrom sfheaders sfc_to_df #' @noRd -linestring_end_points = function(x) { +linestring_end_points = function(x ,return_df = FALSE) { coords = sfc_to_df(st_geometry(x)) is_end = !duplicated(coords[["linestring_id"]], fromLast = TRUE) - df_to_points(coords[is_end, ], x) + ends = coords[is_end, names(coords) %in% c("x", "y", "z", "m")] + if (return_df) return (ends) + df_to_points(ends, x, select = FALSE) } #' Get the segments of linestring geometries