Skip to content

Commit

Permalink
refactor: Use a new workflow for precision handling in point equality…
Browse files Browse the repository at this point in the history
… tests. Refs #130 🚧
  • Loading branch information
luukvdmeer committed Sep 16, 2024
1 parent feedd8b commit a7c6de4
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 13 deletions.
42 changes: 42 additions & 0 deletions R/create.R
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,48 @@ create_from_spatial_lines = function(x, directed = TRUE,
)
}

create_from_spatial_lines_v2 = function(x, directed = TRUE,
compute_length = FALSE) {
# The provided lines will form the edges of the network.
edges = st_as_sf(x)
# Get the coordinates of the boundary points of the edges.
# These will form the nodes of the network.
node_coords = linestring_boundary_points(edges, return_df = TRUE)
# Give each unique location a unique ID.
indices = st_match_points_df(node_coords, st_precision(x))
# Convert the node coordinates into point geometry objects.
nodes = df_to_points(node_coords, x, select = FALSE)
# Define for each endpoint if it is a source or target node.
is_source = rep(c(TRUE, FALSE), length(nodes) / 2)
# Define for each edge which node is its source and target node.
if ("from" %in% colnames(edges)) raise_overwrite("from")
edges$from = indices[is_source]
if ("to" %in% colnames(edges)) raise_overwrite("to")
edges$to = indices[!is_source]
# Remove duplicated nodes from the nodes table.
nodes = nodes[!duplicated(indices)]
# Convert to sf object
nodes = st_sf(geometry = nodes)
# Use the same sf column name in the nodes as in the edges.
geom_colname = attr(edges, "sf_column")
if (geom_colname != "geometry") {
names(nodes)[1] = geom_colname
attr(nodes, "sf_column") = geom_colname
}
# Use the same class for the nodes as for the edges.
# This mainly affects the "lower level" classes.
# For example an sf tibble instead of a sf data frame.
class(nodes) = class(edges)
# Create a network out of the created nodes and the provided edges.
# Force to skip network validity tests because we already know they pass.
sfnetwork(nodes, edges,
directed = directed,
edges_as_lines = TRUE,
compute_length = compute_length,
force = TRUE
)
}

#' Create a spatial network from point geometries
#'
#' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}}
Expand Down
169 changes: 168 additions & 1 deletion R/morphers.R
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,7 @@ to_spatial_unique = function(x, summarise_attributes = "ignore",
node_geoms = st_geometry(nodes)
node_geomcol = attr(nodes, "sf_column")
# Define which nodes have equal geometries.
matches = st_match(node_geoms)
matches = st_match_points(node_geoms)
# Update the attribute summary instructions.
# During morphing tidygraph adds the tidygraph node index column.
# Since it is added internally it is not referenced in summarise_attributes.
Expand Down Expand Up @@ -941,3 +941,170 @@ to_spatial_unique = function(x, summarise_attributes = "ignore",
unique = tbg_to_sfn(x_new %preserve_network_attrs% x)
)
}


to_spatial_subdivision_orig = function(x) {
if (will_assume_constant(x)) raise_assume_constant("to_spatial_subdivision")
# Retrieve nodes and edges from the network.
nodes = nodes_as_sf(x)
edges = edges_as_sf(x)
# For later use:
# --> Check wheter x is directed.
directed = is_directed(x)
## ===========================
# STEP I: DECOMPOSE THE EDGES
# Decompose the edges linestring geometries into the points that shape them.
## ===========================
# Extract all points from the linestring geometries of the edges.
edge_pts = sf_to_df(edges)
# Extract two subsets of information:
# --> One with only the coordinates of the points
# --> Another with indices describing to which edge a point belonged.
edge_coords = edge_pts[names(edge_pts) %in% c("x", "y", "z", "m")]
edge_idxs = edge_pts$linestring_id
## =======================================
# STEP II: DEFINE WHERE TO SUBDIVIDE EDGES
# Edges should be split at locations where:
# --> An edge interior point is equal to a boundary point in another edge.
# --> An edge interior point is equal to an interior point in another edge.
# Hence, we need to split edges at point that:
# --> Are interior points.
# --> Have at least one duplicate among the other edge points.
## =======================================
# Find which of the edge points is a boundary point.
is_startpoint = !duplicated(edge_idxs)
is_endpoint = !duplicated(edge_idxs, fromLast = TRUE)
is_boundary = is_startpoint | is_endpoint
# Find which of the edge points occur more than once.
is_duplicate_desc = duplicated(edge_coords)
is_duplicate_asc = duplicated(edge_coords, fromLast = TRUE)
has_duplicate = is_duplicate_desc | is_duplicate_asc
# Split points are those edge points satisfying both of the following rules:
# --> 1) They have at least one duplicate among the other edge points.
# --> 2) They are not edge boundary points themselves.
is_split = has_duplicate & !is_boundary
if (! any(is_split)) return (x)
## ================================
# STEP III: DUPLICATE SPLIT POINTS
# The split points are currently a single interior point in an edge.
# They will become the endpoint of one edge *and* the startpoint of another.
# Hence, each split point needs to be duplicated.
## ================================
# Create the repetition vector:
# --> This defines for each edge point if it should be duplicated.
# --> A value of '1' means 'store once', i.e. don't duplicate.
# --> A value of '2' means 'store twice', i.e. duplicate.
# --> Split points will be part of two new edges and should be duplicated.
reps = rep(1L, nrow(edge_coords))
reps[is_split] = 2L
# Create the new coordinate data frame by duplicating split points.
new_edge_coords = data.frame(lapply(edge_coords, function(i) rep(i, reps)))
## ==========================================
# STEP IV: CONSTRUCT THE NEW EDGES GEOMETRIES
# With the new coords of the edge points we need to recreate linestrings.
# First we need to know which edge points belong to which *new* edge.
# Then we need to build a linestring geometry for each new edge.
## ==========================================
# First assign each new edge point coordinate its *original* edge index.
# --> Then increment those accordingly at each split point.
orig_edge_idxs = rep(edge_idxs, reps)
# Original edges are subdivided at each split point.
# Therefore, a new edge originates from each split point.
# Hence, to get the new edge indices:
# --> Increment each original edge index by 1 at each split point.
incs = integer(nrow(new_edge_coords)) # By default don't increment.
incs[which(is_split) + 1:sum(is_split)] = 1L # Add 1 after each split.
new_edge_idxs = orig_edge_idxs + cumsum(incs)
new_edge_coords$edge_id = new_edge_idxs
# Build the new edge geometries.
new_edge_geoms = sfc_linestring(new_edge_coords, linestring_id = "edge_id")
st_crs(new_edge_geoms) = st_crs(edges)
st_precision(new_edge_geoms) = st_precision(edges)
new_edge_coords$edge_id = NULL
## ===================================
# STEP V: CONSTRUCT THE NEW EDGE DATA
# We now have the geometries of the new edges.
# However, the original edge attributes got lost.
# We will restore them by:
# --> Adding back the attributes to edges that were not split.
# --> Duplicating original attributes within splitted edges.
# Beware that from and to columns will remain unchanged at this stage.
# We will update them later.
## ===================================
# Find which *original* edge belongs to which *new* edge:
# --> Use the lists of edge indices mapped to the new edge points.
# --> There we already mapped each new edge point to its original edge.
# --> First define which new edge points are startpoints of new edges.
# --> Then retrieve the original edge index from these new startpoints.
# --> This gives us a single original edge index for each new edge.
is_new_startpoint = !duplicated(new_edge_idxs)
orig_edge_idxs = orig_edge_idxs[is_new_startpoint]
# Duplicate original edge data whenever needed.
new_edges = edges[orig_edge_idxs, ]
# Set the new edge geometries as geometries of these new edges.
st_geometry(new_edges) = new_edge_geoms
## ==========================================
# STEP VI: CONSTRUCT THE NEW NODE GEOMETRIES
# All split points are now boundary points of new edges.
# All edge boundaries become nodes in the network.
## ==========================================
is_new_boundary = rep(is_split | is_boundary, reps)
new_node_geoms = sfc_point(new_edge_coords[is_new_boundary, ])
st_crs(new_node_geoms) = st_crs(nodes)
st_precision(new_node_geoms) = st_precision(nodes)
## =====================================
# STEP VII: CONSTRUCT THE NEW NODE DATA
# We now have the geometries of the new nodes.
# However, the original node attributes got lost.
# We will restore them by:
# --> Adding back the attributes to nodes that were already a node before.
# --> Filling attribute values of newly added nodes with NA.
# Beware at this stage the nodes are recreated from scratch.
# That means each boundary point of the new edges is stored as separate node.
# Boundaries with equal geometries will be merged into a single node later.
## =====================================
# Find which of the *original* edge points equaled which *original* node.
# If an edge point did not equal a node, store NA instead.
node_idxs = rep(NA, nrow(edge_pts))
if (directed) {
node_idxs[is_boundary] = edge_incident_ids(x)
} else {
node_idxs[is_boundary] = edge_boundary_ids(x)
}
# Find which of the *original* nodes belong to which *new* edge boundary.
# If a new edge boundary does not equal an original node, store NA instead.
orig_node_idxs = rep(node_idxs, reps)[is_new_boundary]
# Retrieve original node data for each new edge boundary.
# Rows of newly added nodes will be NA.
new_nodes = nodes[orig_node_idxs, ]
# Set the new node geometries as geometries of these new nodes.
st_geometry(new_nodes) = new_node_geoms
## ==================================================
# STEP VIII: UPDATE FROM AND TO INDICES OF NEW EDGES
# Now we updated the node data, the node indices changes.
# Therefore we need to update the from and to columns of the edges as well.
## ==================================================
# Define the indices of the new nodes.
# Equal geometries should get the same index.
new_node_idxs = st_match(new_node_geoms)
# Map node indices to edges.
is_source = rep(c(TRUE, FALSE), length(new_node_geoms) / 2)
new_edges$from = new_node_idxs[is_source]
new_edges$to = new_node_idxs[!is_source]
## =============================
# STEP IX: UPDATE THE NEW NODES
# We can now remove the duplicated node geometries from the new nodes data.
# Then, each location is represented by a single node.
## =============================
new_nodes = new_nodes[!duplicated(new_node_idxs), ]
## ============================
# STEP X: RECREATE THE NETWORK
# Use the new nodes data and the new edges data to create the new network.
## ============================
# Create new network.
x_new = sfnetwork_(new_nodes, new_edges, directed = directed)
# Return in a list.
list(
subdivision = x_new %preserve_network_attrs% x
)
}
2 changes: 1 addition & 1 deletion R/subdivide.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ subdivide = function(x, merge_equal = TRUE) {
# Compute for each edge point a unique location index.
# Edge points that are spatially equal get the same location index.
edge_coords = edge_pts[names(edge_pts) %in% c("x", "y", "z", "m")]
edge_lids = st_match_df(edge_coords)
edge_lids = st_match_points_df(edge_coords, st_precision(edges))
edge_pts$lid = edge_lids
# Define which edge points are not unique.
has_duplicate = duplicated(edge_lids) | duplicated(edge_lids, fromLast = TRUE)
Expand Down
63 changes: 52 additions & 11 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,25 @@ st_match = function(x) {
match(idxs, unique(idxs))
}

st_match_df = function(x) {
x_str = do.call(paste, x)
match(x_str, unique(x_str))
#' @importFrom sf st_geometry
#' @importFrom sfheaders sfc_to_df
st_match_points = function(x, precision = attr(x, "precision")) {
x_df = sfc_to_df(st_geometry(x))
coords = x_df[, names(x_df) %in% c("x", "y", "z", "m")]
st_match_points_df(coords, precision = precision)
}

st_match_points_df = function(x, precision = NULL) {
x_trim = lapply(x, round, digits = precision_to_digits(precision))
x_concat = do.call(paste, x_trim)
match(x_concat, unique(x_concat))
}

#' @importFrom cli cli_abort
precision_to_digits = function(x) {
if (is.null(x) || x == 0) return (12)
if (x > 0) return (log(x, 10))
cli_abort("Currently sfnetworks does not support negative precision")
}

#' Rounding of coordinates of point and linestring geometries
Expand Down Expand Up @@ -164,56 +180,81 @@ df_to_lines = function(x_df, x_sf, id_col = "linestring_id", select = TRUE) {
#' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}}
#' with \code{LINESTRING} geometries.
#'
#' @param return_df Should a data frame with one column per coordinate be
#' returned instead of a \code{\link[sf]{sfc}} object? Defaults to
#' \code{FALSE}.
#'
#' @return An object of class \code{\link[sf]{sfc}} with \code{POINT}
#' geometries, of length equal to twice the number of lines in x, and ordered
#' as [start of line 1, end of line 1, start of line 2, end of line 2, ...].
#' If \code{return_df = TRUE}, a data frame with one column per coordinate is
#' returned instead, with number of rows equal to twice the number of lines in
#' x.
#'
#' @details With boundary points we mean the points at the start and end of
#' a linestring.
#'
#' @importFrom sf st_geometry
#' @importFrom sfheaders sfc_to_df
#' @noRd
linestring_boundary_points = function(x) {
linestring_boundary_points = function(x, return_df = FALSE) {
coords = sfc_to_df(st_geometry(x))
is_start = !duplicated(coords[["linestring_id"]])
is_end = !duplicated(coords[["linestring_id"]], fromLast = TRUE)
is_bound = is_start | is_end
df_to_points(coords[is_bound, ], x)
bounds = coords[is_bound, names(coords) %in% c("x", "y", "z", "m")]
if (return_df) return (bounds)
df_to_points(bounds, x, select = FALSE)
}

#' Get the start points of linestring geometries
#'
#' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}}
#' with \code{LINESTRING} geometries.
#'
#' @param return_df Should a data frame with one column per coordinate be
#' returned instead of a \code{\link[sf]{sfc}} object? Defaults to
#' \code{FALSE}.
#'
#' @return An object of class \code{\link[sf]{sfc}} with \code{POINT}
#' geometries, of length equal to the number of lines in x.
#' geometries, of length equal to the number of lines in x. If
#' \code{return_df = TRUE}, a data frame with one column per coordinate is
#' returned instead, with number of rows equal to the number of lines in x.
#'
#' @importFrom sf st_geometry
#' @importFrom sfheaders sfc_to_df
#' @noRd
linestring_start_points = function(x) {
linestring_start_points = function(x, return_df = FALSE) {
coords = sfc_to_df(st_geometry(x))
is_start = !duplicated(coords[["linestring_id"]])
df_to_points(coords[is_start, ], x)
starts = coords[is_start, names(coords) %in% c("x", "y", "z", "m")]
if (return_df) return (starts)
df_to_points(starts, x, select = FALSE)
}

#' Get the end points of linestring geometries
#'
#' @param x An object of class \code{\link[sf]{sf}} or \code{\link[sf]{sfc}}
#' with \code{LINESTRING} geometries.
#'
#' @param return_df Should a data frame with one column per coordinate be
#' returned instead of a \code{\link[sf]{sfc}} object? Defaults to
#' \code{FALSE}.
#'
#' @return An object of class \code{\link[sf]{sfc}} with \code{POINT}
#' geometries, of length equal to the number of lines in x.
#' geometries, of length equal to the number of lines in x. If
#' \code{return_df = TRUE}, a data frame with one column per coordinate is
#' returned instead, with number of rows equal to the number of lines in x.
#'
#' @importFrom sf st_geometry
#' @importFrom sfheaders sfc_to_df
#' @noRd
linestring_end_points = function(x) {
linestring_end_points = function(x ,return_df = FALSE) {
coords = sfc_to_df(st_geometry(x))
is_end = !duplicated(coords[["linestring_id"]], fromLast = TRUE)
df_to_points(coords[is_end, ], x)
ends = coords[is_end, names(coords) %in% c("x", "y", "z", "m")]
if (return_df) return (ends)
df_to_points(ends, x, select = FALSE)
}

#' Get the segments of linestring geometries
Expand Down

0 comments on commit a7c6de4

Please sign in to comment.