refactor: Use a new workflow for precision handling in point equality…

… tests part II. Refs #130 🚧
luukvdmeer · Sep 17, 2024 · f2c8048 · f2c8048
1 parent a7c6de4
commit f2c8048
Show file tree

Hide file tree

Showing 9 changed files with 80 additions and 226 deletions.
diff --git a/R/blend.R b/R/blend.R
@@ -241,7 +241,7 @@ blend_ = function(x, y, tolerance) {
   # Remove duplicated features in y.
   # These features will have the same blending location.
   # Only one point can be blended per location.
-  is_duplicated = st_duplicated(Y)
+  is_duplicated = st_duplicated_points(Y)
   Y = Y[!is_duplicated]
   ## ==========================================
   # STEP V: INCLUDE FEATURES IN EDGE GEOMETRIES

diff --git a/R/create.R b/R/create.R
@@ -414,6 +414,11 @@ as_sfnetwork.focused_tbl_graph = function(x, ...) {
 #' in the network. Nodes are created at the line boundaries. Shared boundaries
 #' between multiple linestrings become the same node.
 #'
+#' @note By default sfnetworks rounds coordinates to 12 decimal places to
+#' determine spatial equality. You can influence this behavior by explicitly
+#' setting the precision of the linestrings using
+#' \code{\link[sf]{st_set_precision}}.
+#'
 #' @return An object of class \code{\link{sfnetwork}}.
 #'
 #' @examples
@@ -429,51 +434,12 @@ as_sfnetwork.focused_tbl_graph = function(x, ...) {
 #'
 #' par(oldpar)
 #'
-#' @importFrom sf st_as_sf st_sf
+#' @importFrom sf st_as_sf st_precision st_sf
 #' @export
 create_from_spatial_lines = function(x, directed = TRUE,
                                      compute_length = FALSE) {
   # The provided lines will form the edges of the network.
   edges = st_as_sf(x)
-  # Get the boundary points of the edges.
-  nodes = linestring_boundary_points(edges)
-  # Give each unique location a unique ID.
-  indices = st_match(nodes)
-  # Define for each endpoint if it is a source or target node.
-  is_source = rep(c(TRUE, FALSE), length(nodes) / 2)
-  # Define for each edge which node is its source and target node.
-  if ("from" %in% colnames(edges)) raise_overwrite("from")
-  edges$from = indices[is_source]
-  if ("to" %in% colnames(edges)) raise_overwrite("to")
-  edges$to = indices[!is_source]
-  # Remove duplicated nodes from the nodes table.
-  nodes = nodes[!duplicated(indices)]
-  # Convert to sf object
-  nodes = st_sf(geometry = nodes)
-  # Use the same sf column name in the nodes as in the edges.
-  geom_colname = attr(edges, "sf_column")
-  if (geom_colname != "geometry") {
-    names(nodes)[1] = geom_colname
-    attr(nodes, "sf_column") = geom_colname
-  }
-  # Use the same class for the nodes as for the edges.
-  # This mainly affects the "lower level" classes.
-  # For example an sf tibble instead of a sf data frame.
-  class(nodes) = class(edges)
-  # Create a network out of the created nodes and the provided edges.
-  # Force to skip network validity tests because we already know they pass.
-  sfnetwork(nodes, edges,
-    directed = directed,
-    edges_as_lines = TRUE,
-    compute_length = compute_length,
-    force = TRUE
-  )
-}
-
-create_from_spatial_lines_v2 = function(x, directed = TRUE,
-                                     compute_length = FALSE) {
-  # The provided lines will form the edges of the network.
-  edges = st_as_sf(x)
   # Get the coordinates of the boundary points of the edges.
   # These will form the nodes of the network.
   node_coords = linestring_boundary_points(edges, return_df = TRUE)

diff --git a/R/join.R b/R/join.R
@@ -1,8 +1,7 @@
 #' Join two spatial networks based on equality of node geometries
 #'
 #' A spatial network specific join function which makes a spatial full join on
-#' the geometries of the nodes data, based on the \code{\link[sf]{st_equals}}
-#' spatial predicate. Edge data are combined using a
+#' the geometries of the nodes data. Edge data are combined using a
 #' \code{\link[dplyr]{bind_rows}} semantic, meaning that data are matched by
 #' column name and values are filled with \code{NA} if missing in either of
 #' the networks. The \code{from} and \code{to} columns in the edge data are
@@ -15,6 +14,11 @@
 #'
 #' @param ... Arguments passed on to \code{\link[tidygraph]{graph_join}}.
 #'
+#' @note By default sfnetworks rounds coordinates to 12 decimal places to
+#' determine spatial equality. You can influence this behavior by explicitly
+#' setting the precision of the networks using
+#' \code{\link[sf]{st_set_precision}}.
+#'
 #' @return The joined networks as an object of class \code{\link{sfnetwork}}.
 #'
 #' @examples
@@ -86,7 +90,7 @@ spatial_join_network = function(x, y, ...) {
   N_x = vertex_attr(x, x_geomcol)
   N_y = vertex_attr(y, y_geomcol)
   N = c(N_x, N_y)
-  uid = st_match(N)
+  uid = st_match_points(N)
   # Store the unique node indices as node attributes in both x and y.
   if (".sfnetwork_index" %in% c(vertex_attr_names(x), vertex_attr_names(y))) {
     raise_reserved_attr(".sfnetwork_index")

diff --git a/R/morphers.R b/R/morphers.R
@@ -838,7 +838,10 @@ to_spatial_smooth = function(x,
 #' be merged into a single node, and should subdivision points at the same
 #' as an existing node be merged into that node? Defaults to \code{TRUE}. If
 #' set to \code{FALSE}, each subdivision point is added separately as a new
-#' node to the network.
+#' node to the network. By default sfnetworks rounds coordinates to 12 decimal
+#' places to determine spatial equality. You can influence this behavior by
+#' explicitly setting the precision of the network using
+#' \code{\link[sf]{st_set_precision}}.
 #'
 #' @export
 to_spatial_subdivision = function(x, merge_equal = TRUE) {
@@ -890,7 +893,10 @@ to_spatial_transformed = function(x, ...) {
 
 #' @describeIn spatial_morphers Merge nodes with equal geometries into a single
 #' node. Returns a \code{morphed_sfnetwork} containing a single element of
-#' class \code{\link{sfnetwork}}.
+#' class \code{\link{sfnetwork}}. By default sfnetworks rounds coordinates to
+#' 12 decimal places to determine spatial equality. You can influence this
+#' behavior by explicitly setting the precision of the network using
+#' \code{\link[sf]{st_set_precision}}.
 #'
 #' @importFrom igraph contract delete_vertex_attr
 #' @importFrom sf st_as_sf st_geometry
@@ -941,170 +947,3 @@ to_spatial_unique = function(x, summarise_attributes = "ignore",
     unique = tbg_to_sfn(x_new %preserve_network_attrs% x)
   )
 }
-
-
-to_spatial_subdivision_orig = function(x) {
-  if (will_assume_constant(x)) raise_assume_constant("to_spatial_subdivision")
-  # Retrieve nodes and edges from the network.
-  nodes = nodes_as_sf(x)
-  edges = edges_as_sf(x)
-  # For later use:
-  # --> Check wheter x is directed.
-  directed = is_directed(x)
-  ## ===========================
-  # STEP I: DECOMPOSE THE EDGES
-  # Decompose the edges linestring geometries into the points that shape them.
-  ## ===========================
-  # Extract all points from the linestring geometries of the edges.
-  edge_pts = sf_to_df(edges)
-  # Extract two subsets of information:
-  # --> One with only the coordinates of the points
-  # --> Another with indices describing to which edge a point belonged.
-  edge_coords = edge_pts[names(edge_pts) %in% c("x", "y", "z", "m")]
-  edge_idxs = edge_pts$linestring_id
-  ## =======================================
-  # STEP II: DEFINE WHERE TO SUBDIVIDE EDGES
-  # Edges should be split at locations where:
-  # --> An edge interior point is equal to a boundary point in another edge.
-  # --> An edge interior point is equal to an interior point in another edge.
-  # Hence, we need to split edges at point that:
-  # --> Are interior points.
-  # --> Have at least one duplicate among the other edge points.
-  ## =======================================
-  # Find which of the edge points is a boundary point.
-  is_startpoint = !duplicated(edge_idxs)
-  is_endpoint = !duplicated(edge_idxs, fromLast = TRUE)
-  is_boundary = is_startpoint | is_endpoint
-  # Find which of the edge points occur more than once.
-  is_duplicate_desc = duplicated(edge_coords)
-  is_duplicate_asc = duplicated(edge_coords, fromLast = TRUE)
-  has_duplicate = is_duplicate_desc | is_duplicate_asc
-  # Split points are those edge points satisfying both of the following rules:
-  # --> 1) They have at least one duplicate among the other edge points.
-  # --> 2) They are not edge boundary points themselves.
-  is_split = has_duplicate & !is_boundary
-  if (! any(is_split)) return (x)
-  ## ================================
-  # STEP III: DUPLICATE SPLIT POINTS
-  # The split points are currently a single interior point in an edge.
-  # They will become the endpoint of one edge *and* the startpoint of another.
-  # Hence, each split point needs to be duplicated.
-  ## ================================
-  # Create the repetition vector:
-  # --> This defines for each edge point if it should be duplicated.
-  # --> A value of '1' means 'store once', i.e. don't duplicate.
-  # --> A value of '2' means 'store twice', i.e. duplicate.
-  # --> Split points will be part of two new edges and should be duplicated.
-  reps = rep(1L, nrow(edge_coords))
-  reps[is_split] = 2L
-  # Create the new coordinate data frame by duplicating split points.
-  new_edge_coords = data.frame(lapply(edge_coords, function(i) rep(i, reps)))
-  ## ==========================================
-  # STEP IV: CONSTRUCT THE NEW EDGES GEOMETRIES
-  # With the new coords of the edge points we need to recreate linestrings.
-  # First we need to know which edge points belong to which *new* edge.
-  # Then we need to build a linestring geometry for each new edge.
-  ## ==========================================
-  # First assign each new edge point coordinate its *original* edge index.
-  # --> Then increment those accordingly at each split point.
-  orig_edge_idxs = rep(edge_idxs, reps)
-  # Original edges are subdivided at each split point.
-  # Therefore, a new edge originates from each split point.
-  # Hence, to get the new edge indices:
-  # --> Increment each original edge index by 1 at each split point.
-  incs = integer(nrow(new_edge_coords)) # By default don't increment.
-  incs[which(is_split) + 1:sum(is_split)] = 1L # Add 1 after each split.
-  new_edge_idxs = orig_edge_idxs + cumsum(incs)
-  new_edge_coords$edge_id = new_edge_idxs
-  # Build the new edge geometries.
-  new_edge_geoms = sfc_linestring(new_edge_coords, linestring_id = "edge_id")
-  st_crs(new_edge_geoms) = st_crs(edges)
-  st_precision(new_edge_geoms) = st_precision(edges)
-  new_edge_coords$edge_id = NULL
-  ## ===================================
-  # STEP V: CONSTRUCT THE NEW EDGE DATA
-  # We now have the geometries of the new edges.
-  # However, the original edge attributes got lost.
-  # We will restore them by:
-  # --> Adding back the attributes to edges that were not split.
-  # --> Duplicating original attributes within splitted edges.
-  # Beware that from and to columns will remain unchanged at this stage.
-  # We will update them later.
-  ## ===================================
-  # Find which *original* edge belongs to which *new* edge:
-  # --> Use the lists of edge indices mapped to the new edge points.
-  # --> There we already mapped each new edge point to its original edge.
-  # --> First define which new edge points are startpoints of new edges.
-  # --> Then retrieve the original edge index from these new startpoints.
-  # --> This gives us a single original edge index for each new edge.
-  is_new_startpoint = !duplicated(new_edge_idxs)
-  orig_edge_idxs = orig_edge_idxs[is_new_startpoint]
-  # Duplicate original edge data whenever needed.
-  new_edges = edges[orig_edge_idxs, ]
-  # Set the new edge geometries as geometries of these new edges.
-  st_geometry(new_edges) = new_edge_geoms
-  ## ==========================================
-  # STEP VI: CONSTRUCT THE NEW NODE GEOMETRIES
-  # All split points are now boundary points of new edges.
-  # All edge boundaries become nodes in the network.
-  ## ==========================================
-  is_new_boundary = rep(is_split | is_boundary, reps)
-  new_node_geoms = sfc_point(new_edge_coords[is_new_boundary, ])
-  st_crs(new_node_geoms) = st_crs(nodes)
-  st_precision(new_node_geoms) = st_precision(nodes)
-  ## =====================================
-  # STEP VII: CONSTRUCT THE NEW NODE DATA
-  # We now have the geometries of the new nodes.
-  # However, the original node attributes got lost.
-  # We will restore them by:
-  # --> Adding back the attributes to nodes that were already a node before.
-  # --> Filling attribute values of newly added nodes with NA.
-  # Beware at this stage the nodes are recreated from scratch.
-  # That means each boundary point of the new edges is stored as separate node.
-  # Boundaries with equal geometries will be merged into a single node later.
-  ## =====================================
-  # Find which of the *original* edge points equaled which *original* node.
-  # If an edge point did not equal a node, store NA instead.
-  node_idxs = rep(NA, nrow(edge_pts))
-  if (directed) {
-    node_idxs[is_boundary] = edge_incident_ids(x)
-  } else {
-    node_idxs[is_boundary] = edge_boundary_ids(x)
-  }
-  # Find which of the *original* nodes belong to which *new* edge boundary.
-  # If a new edge boundary does not equal an original node, store NA instead.
-  orig_node_idxs = rep(node_idxs, reps)[is_new_boundary]
-  # Retrieve original node data for each new edge boundary.
-  # Rows of newly added nodes will be NA.
-  new_nodes = nodes[orig_node_idxs, ]
-  # Set the new node geometries as geometries of these new nodes.
-  st_geometry(new_nodes) = new_node_geoms
-  ## ==================================================
-  # STEP VIII: UPDATE FROM AND TO INDICES OF NEW EDGES
-  # Now we updated the node data, the node indices changes.
-  # Therefore we need to update the from and to columns of the edges as well.
-  ## ==================================================
-  # Define the indices of the new nodes.
-  # Equal geometries should get the same index.
-  new_node_idxs = st_match(new_node_geoms)
-  # Map node indices to edges.
-  is_source = rep(c(TRUE, FALSE), length(new_node_geoms) / 2)
-  new_edges$from = new_node_idxs[is_source]
-  new_edges$to = new_node_idxs[!is_source]
-  ## =============================
-  # STEP IX: UPDATE THE NEW NODES
-  # We can now remove the duplicated node geometries from the new nodes data.
-  # Then, each location is represented by a single node.
-  ## =============================
-  new_nodes = new_nodes[!duplicated(new_node_idxs), ]
-  ## ============================
-  # STEP X: RECREATE THE NETWORK
-  # Use the new nodes data and the new edges data to create the new network.
-  ## ============================
-  # Create new network.
-  x_new = sfnetwork_(new_nodes, new_edges, directed = directed)
-  # Return in a list.
-  list(
-    subdivision = x_new %preserve_network_attrs% x
-  )
-}
diff --git a/R/subdivide.R b/R/subdivide.R
@@ -13,7 +13,10 @@
 #' be merged into a single node, and should subdivision points at the same
 #' as an existing node be merged into that node? Defaults to \code{TRUE}. If
 #' set to \code{FALSE}, each subdivision point is added separately as a new
-#' node to the network.
+#' node to the network. By default sfnetworks rounds coordinates to 12 decimal
+#' places to determine spatial equality. You can influence this behavior by
+#' explicitly setting the precision of the network using
+#' \code{\link[sf]{st_set_precision}}.
 #'
 #' @returns A subdivision of x as object of class \code{\link{sfnetwork}}.
 #'

diff --git a/R/utils.R b/R/utils.R
@@ -24,6 +24,20 @@ st_duplicated = function(x) {
   dup
 }
 
+#' @importFrom sf st_geometry
+#' @importFrom sfheaders sfc_to_df
+st_duplicated_points = function(x, precision = attr(x, "precision")) {
+  x_df = sfc_to_df(st_geometry(x))
+  coords = x_df[, names(x_df) %in% c("x", "y", "z", "m")]
+  st_duplicated_points_df(coords, precision = precision)
+}
+
+st_duplicated_points_df = function(x, precision = NULL) {
+  x_trim = lapply(x, round, digits = precision_digits(precision))
+  x_concat = do.call(paste, x_trim)
+  duplicated(x_concat)
+}
+
 #' Geometry matching
 #'
 #' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}}.
@@ -58,18 +72,11 @@ st_match_points = function(x, precision = attr(x, "precision")) {
 }
 
 st_match_points_df = function(x, precision = NULL) {
-  x_trim = lapply(x, round, digits = precision_to_digits(precision))
+  x_trim = lapply(x, round, digits = precision_digits(precision))
   x_concat = do.call(paste, x_trim)
   match(x_concat, unique(x_concat))
 }
 
-#' @importFrom cli cli_abort
-precision_to_digits = function(x) {
-  if (is.null(x) || x == 0) return (12)
-  if (x > 0) return (log(x, 10))
-  cli_abort("Currently sfnetworks does not support negative precision")
-}
-
 #' Rounding of coordinates of point and linestring geometries
 #'
 #' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}}.
@@ -425,6 +432,24 @@ merge_mranges = function(a, b) {
   ab
 }
 
+#' Infer the number of decimal places from a fixed precision scale factor
+#'
+#' @param x A fixed precision scale factor.
+#'
+#' @details For more information on fixed precision scale factors see
+#' \code{\link[sf]{st_as_binary}}. When the precision scale factor is 0
+#' or not defined, sfnetworks defaults to 12 decimal places.
+#'
+#' @return A numeric value specifying the number of decimal places.
+#'
+#' @importFrom cli cli_abort
+#' @noRd
+precision_digits = function(x) {
+  if (is.null(x) || x == 0) return (12)
+  if (x > 0) return (log(x, 10))
+  cli_abort("Currently sfnetworks does not support negative precision")
+}
+
 #' List-column friendly version of bind_rows
 #'
 #' @param ... Tables to be row-binded.

diff --git a/man/create_from_spatial_lines.Rd b/man/create_from_spatial_lines.Rd