From fc0282c597289b307049f0815cd4ec3bf71aa720 Mon Sep 17 00:00:00 2001 From: Evan Blake Date: Thu, 4 Nov 2021 18:51:15 -0500 Subject: [PATCH 01/14] Initial commit, adding deskew.py --- vision/Object Detection/deskew.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 vision/Object Detection/deskew.py diff --git a/vision/Object Detection/deskew.py b/vision/Object Detection/deskew.py new file mode 100644 index 0000000..e69de29 From d9435935001c354c3f29cf820d9576f6c645d3bd Mon Sep 17 00:00:00 2001 From: Evan Blake Date: Thu, 4 Nov 2021 18:58:06 -0500 Subject: [PATCH 02/14] Initial commit, adding deskew.py --- vision/Object Detection/deskew.py | 150 ++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/vision/Object Detection/deskew.py b/vision/Object Detection/deskew.py index e69de29..07a54c2 100644 --- a/vision/Object Detection/deskew.py +++ b/vision/Object Detection/deskew.py @@ -0,0 +1,150 @@ +import cv2 +import numpy as np +import imutils + + +def resize(img, scale): + new_width = int(img.shape[1] * scale) + new_height = int(img.shape[0] * scale) + dimensions = (new_width, new_height) + return cv2.resize(img, dimensions) + + +# This gets the actual angle of the edge of the camera view; this can be derived using a square pyramid with height 1 +def edge_angle(horizontal_angle, vertical_angle): + return 2 * np.arctan(np.tan(horizontal_angle / 2) * np.cos(vertical_angle / 2)) + + +def calculate_length(horizontal_angle, vertical_angle): + return 2 * 1/(np.cos(vertical_angle / 2)) * np.tan(horizontal_angle / 2) + + +def partial_deskew(image, camera_data): + # Save the original size and aspect ratio for later + og_width = image.shape[1] + og_height = image.shape[0] + og_aspect_ratio = og_width / og_height + + camera_pitch = camera_data.get('camera_pitch') + + # If only one FOV is given, calculate the missing FOV using the aspect ratio and the existing FOV + if 'fov_horizontal' not in camera_data: + fov_vertical = camera_data.get('fov_vertical') + + fov_horizontal = 2 * np.arctan(og_aspect_ratio * np.tan(fov_vertical / 2)) + elif 'fov_vertical' not in camera_data: + fov_horizontal = camera_data.get('fov_horizontal') + + fov_vertical = 2 * np.arctan(np.tan(fov_horizontal / 2) / og_aspect_ratio) + else: + fov_horizontal = camera_data.get('fov_horizontal') + fov_vertical = camera_data.get('fov_vertical') + + # Calculate the actual top and bottom FOV of the camera + fov_edge = edge_angle(fov_horizontal, fov_vertical) + + # Calculate the ratio of the lengths of the top and bottom of the image + top = np.cos(camera_pitch + fov_vertical/2) + bottom = np.cos(camera_pitch - fov_vertical/2) + + # You can use either bottom/top or top/bottom depending on which way you want to transform + distance_ratio = bottom / top + + # Add padding to the image so you can distort outside the image + padding_size = int(((distance_ratio * image.shape[1]) - image.shape[1])/2) + image = cv2.copyMakeBorder(image, 0, 0, padding_size, padding_size, cv2.BORDER_CONSTANT, None, value=0) + + # Find the length of the top of the image and the distance between the top and the bottom + top_length = 2 * 1/(np.cos(camera_pitch + fov_vertical/2)) * np.tan(fov_edge/2) + center_length = np.tan(camera_pitch + fov_vertical/2) - np.tan(camera_pitch - fov_vertical/2) + + # Divide the lengths to get the aspect ratio of the final image + aspect_ratio = top_length / center_length + + # Set the dimensions to match the aspect ratio + # height = int(image.shape[1] / aspect_ratio) + # width = image.shape[1] + + height = image.shape[0] + width = int(image.shape[0] * aspect_ratio) + + # Define the input and output points for the transformation + input_pts = np.float32([[padding_size, 0], + [padding_size + og_width, 0], + [0, image.shape[0] - 1], + [image.shape[1], image.shape[0] - 1]]) + output_pts = np.float32([[0, 0], + [width - 1, 0], + [0, height - 1], + [width - 1, height - 1]]) + + # Use the points to get the transformation matrix and use the matrix to transform the image + matrix = cv2.getPerspectiveTransform(input_pts, output_pts) + result = cv2.warpPerspective(image, matrix, (width, height), flags=cv2.INTER_LINEAR) + + return result + + +def deskew(image, camera_data): + aspect_ratio = image.shape[1] / image.shape[0] + + camera_pitch = camera_data.get('camera_pitch') + camera_pitch = np.deg2rad(camera_pitch) + + # If only one FOV is given, calculate the missing FOV using the aspect ratio and the existing FOV + if 'fov_horizontal' not in camera_data: + fov_vertical = camera_data.get('fov_vertical') + fov_vertical = np.deg2rad(fov_vertical) + + fov_horizontal = 2 * np.arctan(aspect_ratio * np.tan(fov_vertical / 2)) + elif 'fov_vertical' not in camera_data: + fov_horizontal = camera_data.get('fov_horizontal') + fov_horizontal = np.deg2rad(fov_horizontal) + + fov_vertical = 2 * np.arctan(np.tan(fov_horizontal / 2) / aspect_ratio) + else: + fov_horizontal = camera_data.get('fov_horizontal') + fov_vertical = camera_data.get('fov_vertical') + + fov_horizontal = np.deg2rad(fov_horizontal) + fov_vertical = np.deg2rad(fov_vertical) + + # Do a partial deskew of the image to deskew based on the pitch, leaving only the roll + image = partial_deskew(image, {'camera_pitch': camera_pitch, 'fov_horizontal': fov_horizontal}) + + if 'camera_roll' in camera_data and camera_data.get('camera_roll') != 0: + camera_roll = camera_data.get('camera_roll') + camera_roll = np.deg2rad(camera_roll) + + # Rotate 90 degrees and add padding to create a new perspective view with the original roll as the pitch + image = imutils.rotate_bound(image, 90) + + # Calculate the padding needed to create the new perspective image - this is based on trig distances + top_distance = np.tan(camera_pitch + fov_vertical / 2) + bottom_distance = np.tan(camera_pitch - fov_vertical / 2) + image_length = top_distance - bottom_distance + conversion_rate = image.shape[1] / image_length + + bottom_distance = int(conversion_rate * bottom_distance) + + padding_size = 2 * bottom_distance + image.shape[1] + + flipped = cv2.flip(image, 1) + + image = cv2.copyMakeBorder(image, 0, 0, padding_size, 0, cv2.BORDER_CONSTANT, None, value=0) + + # image[0:flipped.shape[0], 0:flipped.shape[1]] = flipped + + # cv2.imwrite("output.png", image) + + image = partial_deskew(image, {'camera_pitch': camera_roll, 'fov_horizontal': camera_pitch * 2 + fov_horizontal}) + + image = imutils.rotate_bound(image, -90) + # image = image[0:int(image.shape[0] / 2) + 1] + + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + coords = cv2.findNonZero(gray) # Find all non-zero points (text) + x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box + image = image[y:y + h, x:x + w] # Crop the image + + return image From b76d8d41ce0e6a18dd9c923ff139b1fe2b56d391 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Thu, 19 May 2022 16:34:36 -0500 Subject: [PATCH 03/14] Added current work --- vision/coordinate_lengths.py | 51 +++++++ vision/vector_deskew.py | 120 +++++++++++++++ vision/vector_mapping.py | 29 ++++ vision/vector_utils.py | 287 +++++++++++++++++++++++++++++++++++ 4 files changed, 487 insertions(+) create mode 100644 vision/coordinate_lengths.py create mode 100644 vision/vector_deskew.py create mode 100644 vision/vector_mapping.py create mode 100644 vision/vector_utils.py diff --git a/vision/coordinate_lengths.py b/vision/coordinate_lengths.py new file mode 100644 index 0000000..737a5c3 --- /dev/null +++ b/vision/coordinate_lengths.py @@ -0,0 +1,51 @@ +import numpy as np + + +def latitude_length(latitude: float) -> float: + """ + Returns the distance in meters of one degree of latitude at a particular longitude + + Parameter + --------- + latitude : float + The latitude in degrees + + References + ---------- + https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree + """ + + # Convert to radians for trig functions + latitude = np.deg2rad(latitude) + + distance: float = 111132.92 - 559.82 * np.cos(2 * latitude) + 1.175 * np.cos(4 * latitude)\ + - 0.0023 * np.cos(6 * latitude) + + return distance + + +def longitude_length(latitude: float) -> float: + """ + Calculates the distance in meters of one degree of longitude at that longitude + + Parameter + --------- + latitude : float + The latitude in degrees + + Returns + ------- + + + References + ---------- + https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree + """ + + # Convert degrees to radians for trig functions + latitude = np.deg2rad(latitude) + + distance: float = 111412.84 * np.cos(latitude) - 93.5 * np.cos(3 * latitude) \ + + 0.118 * np.cos(5 * latitude) + + return distance diff --git a/vision/vector_deskew.py b/vision/vector_deskew.py new file mode 100644 index 0000000..2aff284 --- /dev/null +++ b/vision/vector_deskew.py @@ -0,0 +1,120 @@ +import numpy as np +import numpy.typing as npt +import cv2 +import mavsdk + +import coordinate_lengths +from vector_utils import * + + +def poly_area(coordinates): + x = coordinates[:, 0] + y = coordinates[:, 1] + i = np.arange(len(x)) + + # An implementation of the shoelace algorithm using numpy functions + area = np.abs(np.sum(x[i - 1] * y[i] - x[i] * y[i - 1]) * 0.5) + + return area + + +def pixel_distance(pixel1, pixel2, image_shape, focal_length, attitude, position): + height = position.relative_altitude_m + + intersect1 = pixel_intersect(pixel1, image_shape, focal_length, attitude, height) + intersect2 = pixel_intersect(pixel2, image_shape, focal_length, attitude, height) + + # Calculate the distance between the two intersects + distance = np.linalg.norm(intersect1 - intersect2) + + distance = 3.28084 * distance # convert meters to feet? + + return distance + + +def pixel_coords(pixel, image_shape, focal_length, attitude, position): + height = position.relative_altitude_m + + # Calculate the latitude and longitude lengths (in meters) + latitude_length = coordinate_lengths.latitude_length(position.latitude_deg) + longitude_length = coordinate_lengths.longitude_length(position.latitude_deg) + + # Find the pixel's intersect with the ground to get the location relative to the drone + intersect = pixel_intersect(pixel, image_shape, focal_length, attitude, height) + + # Invert the X axis so that the longitude is correct + intersect[1] *= -1 + + # Convert the location to latitude and longitude and add it to the drone's coordinates + pixel_lat = position.latitude_deg + intersect[0] / latitude_length + pixel_lon = position.longitude_deg + intersect[1] / longitude_length + + return pixel_lat, pixel_lon + + +def vector_deskew(image, focal_length, attitude: mavsdk.telemetry.EulerAngle, area_scale=1, + interpolation=cv2.INTER_NEAREST): + orig_height, orig_width, _ = image.shape + + src_pts = np.float32( + [ + [0, 0], + [orig_width, 0], + [orig_width, orig_height], + [0, orig_height] + ] + ) + + # Convert XY to YX + flipped = np.flip(src_pts, axis=1) + + intersects = np.float32([pixel_intersect(point, image.shape, focal_length, attitude) + for point in flipped]) + + # Flip the endpoints over the X axis (top left is 0,0 for images) + intersects[:, 1] *= -1 + + # Subtract the minimum on both axes so the minimum values on each axis are 0 + intersects = intersects - intersects.min(axis=0) + + # Find the area of the resulting shape + area = poly_area(intersects) + + # Scale the output so the area of the important pixels is about the same as the starting image + target_area = image.shape[0] * image.shape[1] * area_scale + scale = np.sqrt(target_area / area) + dst_pts = intersects * scale + + matrix = cv2.getPerspectiveTransform(src_pts, dst_pts) + + result_height = int(dst_pts[:, 1].max()) + 1 + result_width = int(dst_pts[:, 0].max()) + 1 + + result = cv2.warpPerspective( + image, matrix, (result_width, result_height), flags=interpolation, + borderMode=cv2.BORDER_TRANSPARENT + ) + + return result + + +def main(): + # coords = pixel_coords( + # [720/2, 1080/2], [720, 1080, 3], 10, + # mavsdk.telemetry.EulerAngle(0, -45, 180, 0), + # mavsdk.telemetry.Position(0, 0, None, 100000) + # ) + # + # print(np.around(coords, decimals=8)) + + image = cv2.imread("render2.png") + # print(type(image.shape)) + # image = np.dstack((image, np.full(image.shape[:2], 255))) + # image = feather_edges(image, 100) + image = vector_deskew(image, 10, mavsdk.telemetry.EulerAngle(0, -30, -30, 0)) + + # image = image[:, :, :3] + + +if __name__ == "__main__": + main() diff --git a/vision/vector_mapping.py b/vision/vector_mapping.py new file mode 100644 index 0000000..74161af --- /dev/null +++ b/vision/vector_mapping.py @@ -0,0 +1,29 @@ +import numpy as np +import cv2 + + +def feather_edges(image, blur_amount): + mask = np.full(np.array(image.shape[:2]) - blur_amount, 255) + mask = cv2.copyMakeBorder(mask, + int(blur_amount / 2), + round(blur_amount / 2), + int(blur_amount / 2), + round(blur_amount / 2), + cv2.BORDER_CONSTANT, + value=[0, 0, 0, 0]) + mask = cv2.blur(mask, [blur_amount, blur_amount]) + + image[:, :, 3] = mask + + return image + + +def alpha_over(foreground, background): + back_alpha = np.expand_dims(background[:, :, 3] / 255, axis=2) + fore_alpha = np.expand_dims(foreground[:, :, 3] / 255, axis=2) + + foreground[:, :, 3] = np.max((background[:, :, 3], foreground[:, :, 3])) + + background = fore_alpha * foreground + back_alpha * background * (1 - fore_alpha) + + return background \ No newline at end of file diff --git a/vision/vector_utils.py b/vision/vector_utils.py new file mode 100644 index 0000000..0244dcb --- /dev/null +++ b/vision/vector_utils.py @@ -0,0 +1,287 @@ +import numpy.typing as npt +from typing import List, Tuple, Optional +import numpy as np +from scipy.spatial.transform import Rotation as R +import mavsdk + +# Sony RX100 vii sensor size +SENSOR_WIDTH = 13.2 +SENSOR_HEIGHT = 8.8 + + +def get_fov(focal_length: float, sensor_size: float) -> float: + """ + Converts a given focal length and sensor length to the corresponding field of view in radians + + Parameters + ---------- + focal_length : float + The focal length of the camera in millimeters + sensor_size: + The sensor size along one axis in millimeters + + Returns + ------- + fov : float + The field of view in radians + """ + + return 2 * np.arctan(sensor_size / (2 * focal_length)) + + +def focal_length_to_fovs( + focal_length: float, + sensor_size: Optional[Tuple[float, float]] = (SENSOR_WIDTH, SENSOR_HEIGHT) +) -> Tuple[float, float]: + """ + Converts a given focal length to the horizontal and vertical fields of view in radians + + Parameters + ---------- + focal_length: float + The focal length of the camera in millimeters + sensor_size: Optional[Tuple[float, float]] + The dimensions (width, height) of the sensor. Defaults to SENSOR_WIDTH and SENSOR_HEIGHT, + which are set to 13.2 and 8.8 respectively, the size of the sensor in the Sony RX100 vii + Returns + ------- + fields_of_view : Tuple[float, float] + The horizontal and vertical fields of view in radians + """ + return get_fov(focal_length, sensor_size[0]), get_fov(focal_length, sensor_size[1]) + + +# This gets the actual angle of the edge of the camera view; this can be derived using a square +# pyramid with height 1 +def edge_angle(horizontal_angle: float, vertical_angle: float) -> float: + """ + Finds the angle needed to rotate + + Parameters + ---------- + horizontal_angle + vertical_angle + + Returns + ------- + + """ + return np.arctan(np.tan(horizontal_angle) * np.cos(vertical_angle)) + + +# Calculates the other angle if one FOV is known and the other isn't (DELETE THIS) +def find_angle(angle: float, aspect_ratio: float) -> float: + return 2 * np.arctan(aspect_ratio * np.tan(angle / 2)) + + +def plane_collision( + ray_direction: npt.NDArray[np.float64], + height: float = 1, + epsilon: float = 1e-6 +) -> npt.NDArray[np.float64]: + """ + Returns the point where a ray intersects the XY plane + + Parameters + ---------- + ray_direction : npt.NDArray[np.float64] + XYZ coordinates that represent the direction a ray faces from (0, 0, 0) + height : float + The Z coordinate for the starting height of the ray; can be any units + epsilon : float + Minimum value for the dot product of the ray direction and plane normal + + Raises + ------ + RuntimeError: "no intersection or line is parallel to plane" + Occurs when the ray direction is facing away from or parallel to the plane + + References + ---------- + http://rosettacode.org/wiki/Find_the_intersection_of_a_line_with_a_plane#Python + """ + + # Define the direction of the side face of the plane (In this case, facing upwards towards +Z) + plane_normal: npt.NDArray[np.float64] = np.array([0, 0, 1]) + + plane_point: npt.NDArray[np.float64] = np.array([0, 0, 0]) # Any point on the plane + ray_point: npt.NDArray[np.float64] = np.array([0, 0, height]) # Origin point of the ray + + ndotu: np.float64 = plane_normal.dot(ray_direction) + + # Checks to make sure the ray is pointing into the plane + if -ndotu < epsilon: + raise RuntimeError("no intersection or line is parallel to plane") + + # I didn't make this math but it works + w: npt.NDArray[np.int64] = ray_point - plane_point + si: np.float64 = -plane_normal.dot(w) / ndotu + psi: npt.NDArray[np.float64] = w + si * ray_direction + plane_point + + psi = np.delete(psi, -1) # Remove the Z coordinate since it's always 0 + return psi + + +def euler_rotate( + vector: npt.NDArray[np.float64], + rotation: List[float] +) -> npt.NDArray[np.float64]: + """ + Rotates a vector based on a given roll, pitch, and yaw. + + Follows the MAVSDK.EulerAngle convention - positive roll is banking to the right, positive + pitch is pitching nose up, positive yaw is clock-wise seen from above. + + Parameters + ---------- + vector: npt.NDArray[np.float64] + A vector represented by an XYZ coordinate that will be rotated + rotation: List[float] + The [roll, pitch, yaw] rotation in radians + Returns + ------- + rotated_vector : npt.NDArray[np.float64] + The vector which has been rotated + """ + + # Reverse the Y and Z rotation to match MAVSDK convention + rotation[1] *= -1 + rotation[2] *= -1 + + return R.from_euler('xyz', rotation).apply(vector) + + +def camera_vector(h_angle: float, v_angle: float) -> npt.NDArray[np.float64]: + """ + Generates a vector with an angle h_angle with the horizontal and an angle v_angle with the + vertical. + + Using camera fovs will generate a vector that represents the corner of the camera's view. + + Parameters + ---------- + h_angle : float + The angle in radians to rotate horizontally + v_angle : float + The angle in radians to rotate vertically + Returns + ------- + camera_vector : npt.NDArray[np.float64] + The vector which represents a given location in an image + """ + + # Calculate the vertical rotation needed for the final vector to have the desired direction + edge: float = edge_angle(v_angle, h_angle) + + vector: npt.NDArray[np.float64] = np.array([1, 0, 0], dtype=np.float64) + return euler_rotate(vector, [0, edge, -h_angle]) + + +def pixel_angle(fov: float, ratio: float) -> float: + """ + Calculates a pixel's angle from the center of the camera on a single axis. Analogous to the + pixel's "fov" + + Only one component of the pixel is used here, call this function for each X and Y + + Parameters + ---------- + fov : float + The field of view of the camera in radians olong a given axis + ratio : float + The pixel's position as a ratio of the coordinate to the length of the image + Example: For an image that is 1080 pixels wide, a pixel at position 270 would have a + ratio of 0.25 + + Returns + ------- + angle : float + The pixel's angle from the center of the camera along a single axis + """ + return np.arctan(np.tan(fov / 2) * (1 - 2 * ratio)) + + +def pixel_vector( + pixel: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float +) -> npt.NDArray[np.float64]: + """ + Generates a vector representing the given pixel. + Pixels are in row-major form [Y, X] to match numpy indexing. + + Parameters + ---------- + pixel : Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by image.shape when image is a numpy image array) + focal_length : float + The camera's focal length - used to generate the camera's fields of view + + Returns + ------- + pixel_vector : npt.NDArray[np.float64] + The vector that represents the direction of the given pixel + """ + + # Find the FOVs using the focal length + fov_h: float + fov_v: float + fov_h, fov_v = focal_length_to_fovs(focal_length) + + return camera_vector( + pixel_angle(fov_h, pixel[1] / image_shape[1]), + pixel_angle(fov_v, pixel[0] / image_shape[0]) + ) + + +def pixel_intersect( + pixel: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + attitude: mavsdk.telemetry.EulerAngle, + height: Optional[float] = 1 +) -> npt.NDArray[np.float64]: + """ + Finds the intersection [X,Y] of a given pixel with the ground. + A camera with no rotation points in the +X direction and is centered at [0, 0, height]. + + Parameters + ---------- + pixel : Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by image.shape when image is a numpy image array) + focal_length : float + The camera's focal length + attitude : mavsdk.telemetry.EulerAngle + The rotation of the drone given by MAVSDK + For testing purposes, you can generate an EulerAngle object as following: + mavsdk.telemetry.EulerAngle(roll_deg, pitch_deg, yaw_deg, 0) + With 0 as the input for the timestamp which is not needed. + height : Optional[float] + The height of the drone in any units. If a height is given, the units of the output will + be the units of the input. Defaults to 1. + Returns + ------- + intersect : npt.NDArray[np.float64] + The coordinates [X,Y] where the pixel's vector intersects with the ground. + """ + + # Create the normalized vector representing the direction of the given pixel + vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) + + # Extract the values from the EulerAngle object + cam_roll: float = np.deg2rad(attitude.roll_deg) + cam_pitch: float = np.deg2rad(attitude.pitch_deg) + cam_yaw: float = np.deg2rad(attitude.yaw_deg) + + vector = euler_rotate(vector, [cam_roll, cam_pitch, cam_yaw]) + + intersect: npt.NDArray[np.float64] = plane_collision(vector, height) + + return intersect + +# TODO: +# Specify radians for each From 5b5068fc5abcf08c58d291fb2a9cdeef6e0017db Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Sat, 21 May 2022 17:18:22 -0500 Subject: [PATCH 04/14] Added documentation --- vision/vector_deskew.py | 202 ++++++++++++++++++++++++++++------------ vision/vector_utils.py | 87 ++++++++--------- 2 files changed, 181 insertions(+), 108 deletions(-) diff --git a/vision/vector_deskew.py b/vision/vector_deskew.py index 2aff284..838f8f4 100644 --- a/vision/vector_deskew.py +++ b/vision/vector_deskew.py @@ -1,89 +1,165 @@ -import numpy as np -import numpy.typing as npt import cv2 -import mavsdk +import mypy +import pylint +import black import coordinate_lengths from vector_utils import * -def poly_area(coordinates): - x = coordinates[:, 0] - y = coordinates[:, 1] - i = np.arange(len(x)) - - # An implementation of the shoelace algorithm using numpy functions - area = np.abs(np.sum(x[i - 1] * y[i] - x[i] * y[i - 1]) * 0.5) - - return area - - -def pixel_distance(pixel1, pixel2, image_shape, focal_length, attitude, position): - height = position.relative_altitude_m - - intersect1 = pixel_intersect(pixel1, image_shape, focal_length, attitude, height) - intersect2 = pixel_intersect(pixel2, image_shape, focal_length, attitude, height) +def calculate_distance( + pixel1: Tuple[int, int], + pixel2: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + altitude: float, +) -> float: + """ + Calculates the physical distance between two points on the ground represented by pixels + locations + + Parameters + ---------- + pixel1, pixel2: Tuple[int, int] + The two input pixels in [Y,X] form. The distance between them will be calculated + image_shape : Tuple[int, int, int] + The shape of the image (returned by `image.shape` when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + altitude: float + The altitude of the drone in any units. If an altitude is given, the units of the output + will be the units of the input. + Returns + ------- + distance : float + The distance between the two pixels. Units are the same units as `altitude` + """ + intersect1: npt.NDArray[np.float64] = pixel_intersect( + pixel1, image_shape, focal_length, rotation_deg, altitude + ) + intersect2: npt.NDArray[np.float64] = pixel_intersect( + pixel2, image_shape, focal_length, rotation_deg, altitude + ) # Calculate the distance between the two intersects - distance = np.linalg.norm(intersect1 - intersect2) - - distance = 3.28084 * distance # convert meters to feet? + distance: float = np.linalg.norm(intersect1 - intersect2) return distance -def pixel_coords(pixel, image_shape, focal_length, attitude, position): - height = position.relative_altitude_m - +def get_coordinates( + pixel: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + drone_coordinates: List[float], + altitude_m: float, +) -> Tuple[float, float]: + """ + Calculates the coordinates of the given pixel + + Parameters + ---------- + pixel: Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by `image.shape` when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg: List[float] + The rotation of the drone/camera. The ROTATION_OFFSET in vector_utils.py will be applied + after. + drone_coordinates: List[float] + The coordinates of the drone in degrees of (latitude, longitude) + altitude_m: float + The altitude of the drone in meters + Returns + ------- + pixel_coordinates : Tuple[float, float] + The (latitude, longitude) coordinates of the pixel in degrees + """ # Calculate the latitude and longitude lengths (in meters) - latitude_length = coordinate_lengths.latitude_length(position.latitude_deg) - longitude_length = coordinate_lengths.longitude_length(position.latitude_deg) + latitude_length = coordinate_lengths.latitude_length(drone_coordinates[0]) + longitude_length = coordinate_lengths.longitude_length(drone_coordinates[0]) # Find the pixel's intersect with the ground to get the location relative to the drone - intersect = pixel_intersect(pixel, image_shape, focal_length, attitude, height) + intersect = pixel_intersect( + pixel, image_shape, focal_length, rotation_deg, altitude_m + ) # Invert the X axis so that the longitude is correct intersect[1] *= -1 # Convert the location to latitude and longitude and add it to the drone's coordinates - pixel_lat = position.latitude_deg + intersect[0] / latitude_length - pixel_lon = position.longitude_deg + intersect[1] / longitude_length + pixel_lat = drone_coordinates[0] + intersect[0] / latitude_length + pixel_lon = drone_coordinates[1] + intersect[1] / longitude_length return pixel_lat, pixel_lon -def vector_deskew(image, focal_length, attitude: mavsdk.telemetry.EulerAngle, area_scale=1, - interpolation=cv2.INTER_NEAREST): +def deskew( + image: npt.NDArray[np.uint8], + focal_length: float, + rotation_deg: List[float], + scale: Optional[float] = 1, + interpolation: Optional[int] = cv2.INTER_LINEAR, +) -> npt.NDArray[np.uint8]: + """ + Distorts an image to generate an overhead view of the photo. Parts of the image will be + completely black where the camera could not see. + + Parameters + ---------- + image : npt.NDArray[np.uint8] + The input image to deskew. Aspect ratio should match the camera sensor + focal_length : float + The camera's focal length - used to generate the camera's fields of view + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + scale: Optional[float] + Scales the resolution of the output. A value of 1 makes the area inside the camera view + equal to the original image. Defaults to 1. + interpolation: Optional[int] + The cv2 interpolation type to be used when deskewing. + Returns + ------- + deskewed_image : npt.NDArray[np.uint8] + The deskewed image - the image is flattened with black areas in the margins + """ orig_height, orig_width, _ = image.shape src_pts = np.float32( - [ - [0, 0], - [orig_width, 0], - [orig_width, orig_height], - [0, orig_height] - ] + [[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]] ) # Convert XY to YX flipped = np.flip(src_pts, axis=1) - intersects = np.float32([pixel_intersect(point, image.shape, focal_length, attitude) - for point in flipped]) + intersects = np.float32( + [ + pixel_intersect(point, image.shape, focal_length, rotation_deg) + for point in flipped + ] + ) # Flip the endpoints over the X axis (top left is 0,0 for images) intersects[:, 1] *= -1 # Subtract the minimum on both axes so the minimum values on each axis are 0 - intersects = intersects - intersects.min(axis=0) + intersects -= intersects.min(axis=0) - # Find the area of the resulting shape - area = poly_area(intersects) + # Find the area using cv2 contour tools + area: float = cv2.contourArea(intersects) # Scale the output so the area of the important pixels is about the same as the starting image - target_area = image.shape[0] * image.shape[1] * area_scale - scale = np.sqrt(target_area / area) - dst_pts = intersects * scale + target_area = image.shape[0] * image.shape[1] * scale + intersect_scale = np.sqrt(target_area / area) + dst_pts = intersects * intersect_scale + + dst_pts = np.round(dst_pts) matrix = cv2.getPerspectiveTransform(src_pts, dst_pts) @@ -91,30 +167,34 @@ def vector_deskew(image, focal_length, attitude: mavsdk.telemetry.EulerAngle, ar result_width = int(dst_pts[:, 0].max()) + 1 result = cv2.warpPerspective( - image, matrix, (result_width, result_height), flags=interpolation, - borderMode=cv2.BORDER_TRANSPARENT + image, + matrix, + (result_width, result_height), + flags=interpolation, + borderMode=cv2.BORDER_TRANSPARENT, ) return result def main(): - # coords = pixel_coords( - # [720/2, 1080/2], [720, 1080, 3], 10, - # mavsdk.telemetry.EulerAngle(0, -45, 180, 0), - # mavsdk.telemetry.Position(0, 0, None, 100000) - # ) - # - # print(np.around(coords, decimals=8)) + coords = get_coordinates( + (int(720 / 2), int(1080 / 2)), (720, 1080, 3), 10, [0, -45, 270], [0, 0], 10000 + ) + coords = np.around(coords, decimals=7) + print(coords) - image = cv2.imread("render2.png") - # print(type(image.shape)) - # image = np.dstack((image, np.full(image.shape[:2], 255))) - # image = feather_edges(image, 100) - image = vector_deskew(image, 10, mavsdk.telemetry.EulerAngle(0, -30, -30, 0)) + dist = calculate_distance((720, 1080), (0, 0), (720, 1080, 3), 10, [0, -45, -45], 5) + print(dist) - # image = image[:, :, :3] + image = cv2.imread("render1.png") + image = deskew(image, 10, [45, -45, -45]) + cv2.imwrite("output.png", image) if __name__ == "__main__": main() + +# TODO: +# Remove telemetry stuff +# Use camera offset diff --git a/vision/vector_utils.py b/vision/vector_utils.py index 0244dcb..c179110 100644 --- a/vision/vector_utils.py +++ b/vision/vector_utils.py @@ -8,6 +8,10 @@ SENSOR_WIDTH = 13.2 SENSOR_HEIGHT = 8.8 +# The rotation offset of the camera to the drone. The offset is applied in pixel_intersect +# Set to [0, -90, 0] when the camera is facing directly downwards +ROTATION_OFFSET = [0, 0, 0] + def get_fov(focal_length: float, sensor_size: float) -> float: """ @@ -30,8 +34,8 @@ def get_fov(focal_length: float, sensor_size: float) -> float: def focal_length_to_fovs( - focal_length: float, - sensor_size: Optional[Tuple[float, float]] = (SENSOR_WIDTH, SENSOR_HEIGHT) + focal_length: float, + sensor_size: Optional[Tuple[float, float]] = (SENSOR_WIDTH, SENSOR_HEIGHT), ) -> Tuple[float, float]: """ Converts a given focal length to the horizontal and vertical fields of view in radians @@ -51,33 +55,30 @@ def focal_length_to_fovs( return get_fov(focal_length, sensor_size[0]), get_fov(focal_length, sensor_size[1]) -# This gets the actual angle of the edge of the camera view; this can be derived using a square -# pyramid with height 1 -def edge_angle(horizontal_angle: float, vertical_angle: float) -> float: +def edge_angle(v_angle: float, h_angle: float) -> float: """ - Finds the angle needed to rotate + Finds the angle such that rotating by edge_angle on the Y axis then rotating by h_angle on + the Z axis gives a vector an angle v_angle with the Y axis + + Can be derived using a square pyramid of height 1 Parameters ---------- - horizontal_angle - vertical_angle - + v_angle : float + The vertical angle + h_angle : float + The horizontal angle Returns ------- - + edge_angle : float + The angle to rotate vertically """ - return np.arctan(np.tan(horizontal_angle) * np.cos(vertical_angle)) - -# Calculates the other angle if one FOV is known and the other isn't (DELETE THIS) -def find_angle(angle: float, aspect_ratio: float) -> float: - return 2 * np.arctan(aspect_ratio * np.tan(angle / 2)) + return np.arctan(np.tan(v_angle) * np.cos(h_angle)) def plane_collision( - ray_direction: npt.NDArray[np.float64], - height: float = 1, - epsilon: float = 1e-6 + ray_direction: npt.NDArray[np.float64], height: float = 1, epsilon: float = 1e-6 ) -> npt.NDArray[np.float64]: """ Returns the point where a ray intersects the XY plane @@ -105,7 +106,9 @@ def plane_collision( plane_normal: npt.NDArray[np.float64] = np.array([0, 0, 1]) plane_point: npt.NDArray[np.float64] = np.array([0, 0, 0]) # Any point on the plane - ray_point: npt.NDArray[np.float64] = np.array([0, 0, height]) # Origin point of the ray + ray_point: npt.NDArray[np.float64] = np.array( + [0, 0, height] + ) # Origin point of the ray ndotu: np.float64 = plane_normal.dot(ray_direction) @@ -123,8 +126,7 @@ def plane_collision( def euler_rotate( - vector: npt.NDArray[np.float64], - rotation: List[float] + vector: npt.NDArray[np.float64], rotation_deg: List[float] ) -> npt.NDArray[np.float64]: """ Rotates a vector based on a given roll, pitch, and yaw. @@ -136,7 +138,7 @@ def euler_rotate( ---------- vector: npt.NDArray[np.float64] A vector represented by an XYZ coordinate that will be rotated - rotation: List[float] + rotation_deg: List[float] The [roll, pitch, yaw] rotation in radians Returns ------- @@ -145,10 +147,10 @@ def euler_rotate( """ # Reverse the Y and Z rotation to match MAVSDK convention - rotation[1] *= -1 - rotation[2] *= -1 + rotation_deg[1] *= -1 + rotation_deg[2] *= -1 - return R.from_euler('xyz', rotation).apply(vector) + return R.from_euler("xyz", rotation_deg).apply(vector) def camera_vector(h_angle: float, v_angle: float) -> npt.NDArray[np.float64]: @@ -202,9 +204,7 @@ def pixel_angle(fov: float, ratio: float) -> float: def pixel_vector( - pixel: Tuple[int, int], - image_shape: Tuple[int, int, int], - focal_length: float + pixel: Tuple[int, int], image_shape: Tuple[int, int, int], focal_length: float ) -> npt.NDArray[np.float64]: """ Generates a vector representing the given pixel. @@ -232,16 +232,16 @@ def pixel_vector( return camera_vector( pixel_angle(fov_h, pixel[1] / image_shape[1]), - pixel_angle(fov_v, pixel[0] / image_shape[0]) + pixel_angle(fov_v, pixel[0] / image_shape[0]), ) def pixel_intersect( - pixel: Tuple[int, int], - image_shape: Tuple[int, int, int], - focal_length: float, - attitude: mavsdk.telemetry.EulerAngle, - height: Optional[float] = 1 + pixel: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + height: Optional[float] = 1, ) -> npt.NDArray[np.float64]: """ Finds the intersection [X,Y] of a given pixel with the ground. @@ -255,11 +255,8 @@ def pixel_intersect( The shape of the image (returned by image.shape when image is a numpy image array) focal_length : float The camera's focal length - attitude : mavsdk.telemetry.EulerAngle - The rotation of the drone given by MAVSDK - For testing purposes, you can generate an EulerAngle object as following: - mavsdk.telemetry.EulerAngle(roll_deg, pitch_deg, yaw_deg, 0) - With 0 as the input for the timestamp which is not needed. + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees height : Optional[float] The height of the drone in any units. If a height is given, the units of the output will be the units of the input. Defaults to 1. @@ -272,16 +269,12 @@ def pixel_intersect( # Create the normalized vector representing the direction of the given pixel vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) - # Extract the values from the EulerAngle object - cam_roll: float = np.deg2rad(attitude.roll_deg) - cam_pitch: float = np.deg2rad(attitude.pitch_deg) - cam_yaw: float = np.deg2rad(attitude.yaw_deg) + rotation = np.deg2rad(rotation_deg) - vector = euler_rotate(vector, [cam_roll, cam_pitch, cam_yaw]) + vector = euler_rotate(vector, rotation) + + vector = euler_rotate(vector, ROTATION_OFFSET) intersect: npt.NDArray[np.float64] = plane_collision(vector, height) return intersect - -# TODO: -# Specify radians for each From e09bbfee9b22583b32b86c1b184f258ccefccbd2 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Sun, 22 May 2022 22:05:17 -0500 Subject: [PATCH 05/14] Rebased and started with hooks --- vision/vector_deskew.py | 39 +++------------------------------------ vision/vector_utils.py | 15 ++++++--------- 2 files changed, 9 insertions(+), 45 deletions(-) diff --git a/vision/vector_deskew.py b/vision/vector_deskew.py index 838f8f4..134c611 100644 --- a/vision/vector_deskew.py +++ b/vision/vector_deskew.py @@ -1,7 +1,4 @@ import cv2 -import mypy -import pylint -import black import coordinate_lengths from vector_utils import * @@ -86,9 +83,7 @@ def get_coordinates( longitude_length = coordinate_lengths.longitude_length(drone_coordinates[0]) # Find the pixel's intersect with the ground to get the location relative to the drone - intersect = pixel_intersect( - pixel, image_shape, focal_length, rotation_deg, altitude_m - ) + intersect = pixel_intersect(pixel, image_shape, focal_length, rotation_deg, altitude_m) # Invert the X axis so that the longitude is correct intersect[1] *= -1 @@ -131,18 +126,13 @@ def deskew( """ orig_height, orig_width, _ = image.shape - src_pts = np.float32( - [[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]] - ) + src_pts = np.float32([[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]]) # Convert XY to YX flipped = np.flip(src_pts, axis=1) intersects = np.float32( - [ - pixel_intersect(point, image.shape, focal_length, rotation_deg) - for point in flipped - ] + [pixel_intersect(point, image.shape, focal_length, rotation_deg) for point in flipped] ) # Flip the endpoints over the X axis (top left is 0,0 for images) @@ -175,26 +165,3 @@ def deskew( ) return result - - -def main(): - coords = get_coordinates( - (int(720 / 2), int(1080 / 2)), (720, 1080, 3), 10, [0, -45, 270], [0, 0], 10000 - ) - coords = np.around(coords, decimals=7) - print(coords) - - dist = calculate_distance((720, 1080), (0, 0), (720, 1080, 3), 10, [0, -45, -45], 5) - print(dist) - - image = cv2.imread("render1.png") - image = deskew(image, 10, [45, -45, -45]) - cv2.imwrite("output.png", image) - - -if __name__ == "__main__": - main() - -# TODO: -# Remove telemetry stuff -# Use camera offset diff --git a/vision/vector_utils.py b/vision/vector_utils.py index c179110..fbf273c 100644 --- a/vision/vector_utils.py +++ b/vision/vector_utils.py @@ -2,15 +2,14 @@ from typing import List, Tuple, Optional import numpy as np from scipy.spatial.transform import Rotation as R -import mavsdk # Sony RX100 vii sensor size SENSOR_WIDTH = 13.2 SENSOR_HEIGHT = 8.8 # The rotation offset of the camera to the drone. The offset is applied in pixel_intersect -# Set to [0, -90, 0] when the camera is facing directly downwards -ROTATION_OFFSET = [0, 0, 0] +# Set to [0.0, -90.0, 0.0] when the camera is facing directly downwards +ROTATION_OFFSET = [0.0, 0.0, 0.0] def get_fov(focal_length: float, sensor_size: float) -> float: @@ -106,9 +105,7 @@ def plane_collision( plane_normal: npt.NDArray[np.float64] = np.array([0, 0, 1]) plane_point: npt.NDArray[np.float64] = np.array([0, 0, 0]) # Any point on the plane - ray_point: npt.NDArray[np.float64] = np.array( - [0, 0, height] - ) # Origin point of the ray + ray_point: npt.NDArray[np.float64] = np.array([0, 0, height]) # Origin point of the ray ndotu: np.float64 = plane_normal.dot(ray_direction) @@ -117,7 +114,7 @@ def plane_collision( raise RuntimeError("no intersection or line is parallel to plane") # I didn't make this math but it works - w: npt.NDArray[np.int64] = ray_point - plane_point + w: npt.NDArray[np.float64] = ray_point - plane_point si: np.float64 = -plane_normal.dot(w) / ndotu psi: npt.NDArray[np.float64] = w + si * ray_direction + plane_point @@ -269,9 +266,9 @@ def pixel_intersect( # Create the normalized vector representing the direction of the given pixel vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) - rotation = np.deg2rad(rotation_deg) + rotation_deg = np.deg2rad(rotation_deg) - vector = euler_rotate(vector, rotation) + vector = euler_rotate(vector, rotation_deg) vector = euler_rotate(vector, ROTATION_OFFSET) From 74fcc87550c8ca064be0685126056052ea0e4680 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Mon, 23 May 2022 00:07:05 -0500 Subject: [PATCH 06/14] vector_utils hooks almost finished --- vision/vector_deskew.py | 2 +- vision/vector_utils.py | 69 +++++++++++++++-------------------------- 2 files changed, 26 insertions(+), 45 deletions(-) diff --git a/vision/vector_deskew.py b/vision/vector_deskew.py index 134c611..9487197 100644 --- a/vision/vector_deskew.py +++ b/vision/vector_deskew.py @@ -132,7 +132,7 @@ def deskew( flipped = np.flip(src_pts, axis=1) intersects = np.float32( - [pixel_intersect(point, image.shape, focal_length, rotation_deg) for point in flipped] + [pixel_intersect(point, image.shape, focal_length, rotation_deg, 1) for point in flipped] ) # Flip the endpoints over the X axis (top left is 0,0 for images) diff --git a/vision/vector_utils.py b/vision/vector_utils.py index fbf273c..aaa89b5 100644 --- a/vision/vector_utils.py +++ b/vision/vector_utils.py @@ -1,6 +1,6 @@ -import numpy.typing as npt -from typing import List, Tuple, Optional +from typing import List, Tuple import numpy as np +import numpy.typing as npt from scipy.spatial.transform import Rotation as R # Sony RX100 vii sensor size @@ -32,26 +32,24 @@ def get_fov(focal_length: float, sensor_size: float) -> float: return 2 * np.arctan(sensor_size / (2 * focal_length)) -def focal_length_to_fovs( - focal_length: float, - sensor_size: Optional[Tuple[float, float]] = (SENSOR_WIDTH, SENSOR_HEIGHT), -) -> Tuple[float, float]: +def focal_length_to_fovs(focal_length: float) -> Tuple[float, float]: """ Converts a given focal length to the horizontal and vertical fields of view in radians + Uses SENSOR_WIDTH and SENSOR_HEIGHT, which are set to 13.2 and 8.8 respectively, the size of + the sensor in the Sony RX100 vii + Parameters ---------- focal_length: float The focal length of the camera in millimeters - sensor_size: Optional[Tuple[float, float]] - The dimensions (width, height) of the sensor. Defaults to SENSOR_WIDTH and SENSOR_HEIGHT, - which are set to 13.2 and 8.8 respectively, the size of the sensor in the Sony RX100 vii Returns ------- fields_of_view : Tuple[float, float] - The horizontal and vertical fields of view in radians + The fields of view in radians + Format is [horizontal, vertical] """ - return get_fov(focal_length, sensor_size[0]), get_fov(focal_length, sensor_size[1]) + return get_fov(focal_length, SENSOR_WIDTH), get_fov(focal_length, SENSOR_HEIGHT) def edge_angle(v_angle: float, h_angle: float) -> float: @@ -77,7 +75,7 @@ def edge_angle(v_angle: float, h_angle: float) -> float: def plane_collision( - ray_direction: npt.NDArray[np.float64], height: float = 1, epsilon: float = 1e-6 + ray_direction: npt.NDArray[np.float64], height: float ) -> npt.NDArray[np.float64]: """ Returns the point where a ray intersects the XY plane @@ -88,38 +86,21 @@ def plane_collision( XYZ coordinates that represent the direction a ray faces from (0, 0, 0) height : float The Z coordinate for the starting height of the ray; can be any units - epsilon : float - Minimum value for the dot product of the ray direction and plane normal - Raises - ------ - RuntimeError: "no intersection or line is parallel to plane" - Occurs when the ray direction is facing away from or parallel to the plane + Returns + ------- + intersect : npt.NDArray[np.float64] + The ray's intersection with the plane in [X,Y] format - References - ---------- - http://rosettacode.org/wiki/Find_the_intersection_of_a_line_with_a_plane#Python """ + # Find the "time" at which the line intersects the plane + # Line is defined as ray_direction * time + origin. + # Origin is the point at X, Y, Z = 0, 0, height - # Define the direction of the side face of the plane (In this case, facing upwards towards +Z) - plane_normal: npt.NDArray[np.float64] = np.array([0, 0, 1]) - - plane_point: npt.NDArray[np.float64] = np.array([0, 0, 0]) # Any point on the plane - ray_point: npt.NDArray[np.float64] = np.array([0, 0, height]) # Origin point of the ray - - ndotu: np.float64 = plane_normal.dot(ray_direction) - - # Checks to make sure the ray is pointing into the plane - if -ndotu < epsilon: - raise RuntimeError("no intersection or line is parallel to plane") + time: np.float64 = -height / ray_direction[2] + intersect: npt.NDArray[np.float64] = ray_direction[:2] * time - # I didn't make this math but it works - w: npt.NDArray[np.float64] = ray_point - plane_point - si: np.float64 = -plane_normal.dot(w) / ndotu - psi: npt.NDArray[np.float64] = w + si * ray_direction + plane_point - - psi = np.delete(psi, -1) # Remove the Z coordinate since it's always 0 - return psi + return intersect def euler_rotate( @@ -238,7 +219,7 @@ def pixel_intersect( image_shape: Tuple[int, int, int], focal_length: float, rotation_deg: List[float], - height: Optional[float] = 1, + height: float, ) -> npt.NDArray[np.float64]: """ Finds the intersection [X,Y] of a given pixel with the ground. @@ -254,9 +235,9 @@ def pixel_intersect( The camera's focal length rotation_deg : List[float] The [roll, pitch, yaw] rotation in degrees - height : Optional[float] - The height of the drone in any units. If a height is given, the units of the output will - be the units of the input. Defaults to 1. + height : float + The height that the image was taken at. The units of the output will be the units of the + input. Returns ------- intersect : npt.NDArray[np.float64] @@ -266,7 +247,7 @@ def pixel_intersect( # Create the normalized vector representing the direction of the given pixel vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) - rotation_deg = np.deg2rad(rotation_deg) + rotation_deg = np.deg2rad(rotation_deg).tolist() vector = euler_rotate(vector, rotation_deg) From 0ff3da087773a3821755f3000819286e370bb7c4 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Mon, 23 May 2022 23:51:53 -0500 Subject: [PATCH 07/14] Finished hooks, split vector_deskew, reordered functions in vector_utils, revised some documentation --- vision/Object Detection/deskew.py | 150 ---------------- vision/camera_distances.py | 102 +++++++++++ vision/coordinate_lengths.py | 22 ++- vision/deskew.py | 98 +++++++++++ vision/vector_deskew.py | 167 ------------------ vision/vector_utils.py | 283 +++++++++++++++--------------- 6 files changed, 360 insertions(+), 462 deletions(-) delete mode 100644 vision/Object Detection/deskew.py create mode 100644 vision/camera_distances.py create mode 100644 vision/deskew.py delete mode 100644 vision/vector_deskew.py diff --git a/vision/Object Detection/deskew.py b/vision/Object Detection/deskew.py deleted file mode 100644 index 07a54c2..0000000 --- a/vision/Object Detection/deskew.py +++ /dev/null @@ -1,150 +0,0 @@ -import cv2 -import numpy as np -import imutils - - -def resize(img, scale): - new_width = int(img.shape[1] * scale) - new_height = int(img.shape[0] * scale) - dimensions = (new_width, new_height) - return cv2.resize(img, dimensions) - - -# This gets the actual angle of the edge of the camera view; this can be derived using a square pyramid with height 1 -def edge_angle(horizontal_angle, vertical_angle): - return 2 * np.arctan(np.tan(horizontal_angle / 2) * np.cos(vertical_angle / 2)) - - -def calculate_length(horizontal_angle, vertical_angle): - return 2 * 1/(np.cos(vertical_angle / 2)) * np.tan(horizontal_angle / 2) - - -def partial_deskew(image, camera_data): - # Save the original size and aspect ratio for later - og_width = image.shape[1] - og_height = image.shape[0] - og_aspect_ratio = og_width / og_height - - camera_pitch = camera_data.get('camera_pitch') - - # If only one FOV is given, calculate the missing FOV using the aspect ratio and the existing FOV - if 'fov_horizontal' not in camera_data: - fov_vertical = camera_data.get('fov_vertical') - - fov_horizontal = 2 * np.arctan(og_aspect_ratio * np.tan(fov_vertical / 2)) - elif 'fov_vertical' not in camera_data: - fov_horizontal = camera_data.get('fov_horizontal') - - fov_vertical = 2 * np.arctan(np.tan(fov_horizontal / 2) / og_aspect_ratio) - else: - fov_horizontal = camera_data.get('fov_horizontal') - fov_vertical = camera_data.get('fov_vertical') - - # Calculate the actual top and bottom FOV of the camera - fov_edge = edge_angle(fov_horizontal, fov_vertical) - - # Calculate the ratio of the lengths of the top and bottom of the image - top = np.cos(camera_pitch + fov_vertical/2) - bottom = np.cos(camera_pitch - fov_vertical/2) - - # You can use either bottom/top or top/bottom depending on which way you want to transform - distance_ratio = bottom / top - - # Add padding to the image so you can distort outside the image - padding_size = int(((distance_ratio * image.shape[1]) - image.shape[1])/2) - image = cv2.copyMakeBorder(image, 0, 0, padding_size, padding_size, cv2.BORDER_CONSTANT, None, value=0) - - # Find the length of the top of the image and the distance between the top and the bottom - top_length = 2 * 1/(np.cos(camera_pitch + fov_vertical/2)) * np.tan(fov_edge/2) - center_length = np.tan(camera_pitch + fov_vertical/2) - np.tan(camera_pitch - fov_vertical/2) - - # Divide the lengths to get the aspect ratio of the final image - aspect_ratio = top_length / center_length - - # Set the dimensions to match the aspect ratio - # height = int(image.shape[1] / aspect_ratio) - # width = image.shape[1] - - height = image.shape[0] - width = int(image.shape[0] * aspect_ratio) - - # Define the input and output points for the transformation - input_pts = np.float32([[padding_size, 0], - [padding_size + og_width, 0], - [0, image.shape[0] - 1], - [image.shape[1], image.shape[0] - 1]]) - output_pts = np.float32([[0, 0], - [width - 1, 0], - [0, height - 1], - [width - 1, height - 1]]) - - # Use the points to get the transformation matrix and use the matrix to transform the image - matrix = cv2.getPerspectiveTransform(input_pts, output_pts) - result = cv2.warpPerspective(image, matrix, (width, height), flags=cv2.INTER_LINEAR) - - return result - - -def deskew(image, camera_data): - aspect_ratio = image.shape[1] / image.shape[0] - - camera_pitch = camera_data.get('camera_pitch') - camera_pitch = np.deg2rad(camera_pitch) - - # If only one FOV is given, calculate the missing FOV using the aspect ratio and the existing FOV - if 'fov_horizontal' not in camera_data: - fov_vertical = camera_data.get('fov_vertical') - fov_vertical = np.deg2rad(fov_vertical) - - fov_horizontal = 2 * np.arctan(aspect_ratio * np.tan(fov_vertical / 2)) - elif 'fov_vertical' not in camera_data: - fov_horizontal = camera_data.get('fov_horizontal') - fov_horizontal = np.deg2rad(fov_horizontal) - - fov_vertical = 2 * np.arctan(np.tan(fov_horizontal / 2) / aspect_ratio) - else: - fov_horizontal = camera_data.get('fov_horizontal') - fov_vertical = camera_data.get('fov_vertical') - - fov_horizontal = np.deg2rad(fov_horizontal) - fov_vertical = np.deg2rad(fov_vertical) - - # Do a partial deskew of the image to deskew based on the pitch, leaving only the roll - image = partial_deskew(image, {'camera_pitch': camera_pitch, 'fov_horizontal': fov_horizontal}) - - if 'camera_roll' in camera_data and camera_data.get('camera_roll') != 0: - camera_roll = camera_data.get('camera_roll') - camera_roll = np.deg2rad(camera_roll) - - # Rotate 90 degrees and add padding to create a new perspective view with the original roll as the pitch - image = imutils.rotate_bound(image, 90) - - # Calculate the padding needed to create the new perspective image - this is based on trig distances - top_distance = np.tan(camera_pitch + fov_vertical / 2) - bottom_distance = np.tan(camera_pitch - fov_vertical / 2) - image_length = top_distance - bottom_distance - conversion_rate = image.shape[1] / image_length - - bottom_distance = int(conversion_rate * bottom_distance) - - padding_size = 2 * bottom_distance + image.shape[1] - - flipped = cv2.flip(image, 1) - - image = cv2.copyMakeBorder(image, 0, 0, padding_size, 0, cv2.BORDER_CONSTANT, None, value=0) - - # image[0:flipped.shape[0], 0:flipped.shape[1]] = flipped - - # cv2.imwrite("output.png", image) - - image = partial_deskew(image, {'camera_pitch': camera_roll, 'fov_horizontal': camera_pitch * 2 + fov_horizontal}) - - image = imutils.rotate_bound(image, -90) - # image = image[0:int(image.shape[0] / 2) + 1] - - gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - coords = cv2.findNonZero(gray) # Find all non-zero points (text) - x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box - image = image[y:y + h, x:x + w] # Crop the image - - return image diff --git a/vision/camera_distances.py b/vision/camera_distances.py new file mode 100644 index 0000000..fc6f8fa --- /dev/null +++ b/vision/camera_distances.py @@ -0,0 +1,102 @@ +"""Functions for calculating locations of objects in an image""" + +from typing import Tuple, List + +import numpy as np +import numpy.typing as npt + +import coordinate_lengths +from vector_utils import pixel_intersect + + +def get_coordinates( + pixel: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + drone_coordinates: List[float], + altitude_m: float, +) -> Tuple[float, float]: + """ + Calculates the coordinates of the given pixel + + Parameters + ---------- + pixel: Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by `image.shape` when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg: List[float] + The rotation of the drone/camera. The ROTATION_OFFSET in vector_utils.py will be applied + after. + drone_coordinates: List[float] + The coordinates of the drone in degrees of (latitude, longitude) + altitude_m: float + The altitude of the drone in meters + Returns + ------- + pixel_coordinates : Tuple[float, float] + The (latitude, longitude) coordinates of the pixel in degrees + """ + # Calculate the latitude and longitude lengths (in meters) + latitude_length: float = coordinate_lengths.latitude_length(drone_coordinates[0]) + longitude_length: float = coordinate_lengths.longitude_length(drone_coordinates[0]) + + # Find the pixel's intersect with the ground to get the location relative to the drone + intersect: npt.NDArray[np.float64] = pixel_intersect( + pixel, image_shape, focal_length, rotation_deg, altitude_m + ) + + # Invert the X axis so that the longitude is correct + intersect[1] *= -1 + + # Convert the location to latitude and longitude and add it to the drone's coordinates + pixel_lat = drone_coordinates[0] + intersect[0] / latitude_length + pixel_lon = drone_coordinates[1] + intersect[1] / longitude_length + + return pixel_lat, pixel_lon + + +def calculate_distance( + pixel1: Tuple[int, int], + pixel2: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + altitude: float, +) -> float: + """ + Calculates the physical distance between two points on the ground represented by pixel + locations. Units of `distance` are the same as the units of `altitude` + + Parameters + ---------- + pixel1, pixel2: Tuple[int, int] + The two input pixel locations in [Y,X] form. The distance between them will be calculated + image_shape : Tuple[int, int, int] + The shape of the image (returned by `image.shape` when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + altitude: float + The altitude of the drone in any units. If an altitude is given, the units of the output + will be the units of the input. + Returns + ------- + distance : float + The distance between the two pixels. Units are the same units as `altitude` + """ + intersect1: npt.NDArray[np.float64] = pixel_intersect( + pixel1, image_shape, focal_length, rotation_deg, altitude + ) + intersect2: npt.NDArray[np.float64] = pixel_intersect( + pixel2, image_shape, focal_length, rotation_deg, altitude + ) + + # Calculate the distance between the two intersects + distance: float = float(np.linalg.norm(intersect1 - intersect2)) + + return distance diff --git a/vision/coordinate_lengths.py b/vision/coordinate_lengths.py index 737a5c3..01c78d5 100644 --- a/vision/coordinate_lengths.py +++ b/vision/coordinate_lengths.py @@ -1,3 +1,5 @@ +"""Functions for calculating coordinate degree lengths""" + import numpy as np @@ -10,6 +12,11 @@ def latitude_length(latitude: float) -> float: latitude : float The latitude in degrees + Returns + ------- + latitude_length + The length of a degree of latitude in meters at the given latitude + References ---------- https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree @@ -18,8 +25,12 @@ def latitude_length(latitude: float) -> float: # Convert to radians for trig functions latitude = np.deg2rad(latitude) - distance: float = 111132.92 - 559.82 * np.cos(2 * latitude) + 1.175 * np.cos(4 * latitude)\ + distance: float = ( + 111132.92 + - 559.82 * np.cos(2 * latitude) + + 1.175 * np.cos(4 * latitude) - 0.0023 * np.cos(6 * latitude) + ) return distance @@ -35,8 +46,8 @@ def longitude_length(latitude: float) -> float: Returns ------- - - + longitude_length + The length of a degree of longitude in meters at the given latitude References ---------- https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree @@ -45,7 +56,8 @@ def longitude_length(latitude: float) -> float: # Convert degrees to radians for trig functions latitude = np.deg2rad(latitude) - distance: float = 111412.84 * np.cos(latitude) - 93.5 * np.cos(3 * latitude) \ - + 0.118 * np.cos(5 * latitude) + distance: float = ( + 111412.84 * np.cos(latitude) - 93.5 * np.cos(3 * latitude) + 0.118 * np.cos(5 * latitude) + ) return distance diff --git a/vision/deskew.py b/vision/deskew.py new file mode 100644 index 0000000..71c0d88 --- /dev/null +++ b/vision/deskew.py @@ -0,0 +1,98 @@ +"""Distorts an image to generate an overhead view of the photo.""" + +from typing import List, Tuple, Optional + +import cv2 +import numpy as np +import numpy.typing as npt + +from vector_utils import pixel_intersect + + +def deskew( + image: npt.NDArray[np.uint8], + focal_length: float, + rotation_deg: List[float], + scale: float = 1, + interpolation: Optional[int] = cv2.INTER_LINEAR, +) -> Tuple[npt.NDArray[np.uint8], npt.NDArray[np.float64]]: + """ + Distorts an image to generate an overhead view of the photo. Parts of the image will be + completely black where the camera could not see. + + Parameters + ---------- + image : npt.NDArray[np.uint8] + The input image to deskew. Aspect ratio should match the camera sensor + focal_length : float + The camera's focal length - used to generate the camera's fields of view + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + scale: Optional[float] + Scales the resolution of the output. A value of 1 makes the area inside the camera view + equal to the original image. Defaults to 1. + interpolation: Optional[int] + The cv2 interpolation type to be used when deskewing. + Returns + ------- + (deskewed_image, corner_points) : Tuple[npt.NDArray[np.uint8], npt.NDArray[np.float64]] + deskewed_image : npt.NDArray[np.uint8] + The deskewed image - the image is flattened with black areas in the margins + corner_points : + The corner points of the result in the image. + Points are in order based on their location in the original image. + Format is: (top left, top right, bottom right, bottom left), or + 1--2 + | | + 4--3 + """ + orig_height: int + orig_width: int + orig_height, orig_width, _ = image.shape + + # Generate points in the format + # 1--2 + # | | + # 4--3 + + src_pts: npt.NDArray[np.float32] = np.array( + [[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]], dtype=np.float32 + ) + intersects: npt.NDArray[np.float32] = np.array( + [ + pixel_intersect(point, image.shape, focal_length, rotation_deg, 1) + for point in np.flip(src_pts, axis=1) # use np.flip to convert XY to YX + ], + dtype=np.float32, + ) + + # Flip the endpoints over the X axis (top left is 0,0 for images) + intersects[:, 1] *= -1 + + # Subtract the minimum on both axes so the minimum values on each axis are 0 + intersects -= np.min(intersects, axis=0) + + # Find the area using cv2 contour tools + area: float = cv2.contourArea(intersects) + + # Scale the output so the area of the important pixels is about the same as the starting image + target_area: float = float(image.shape[0]) * (float(image.shape[1]) * scale) + intersect_scale: np.float64 = np.float64(np.sqrt(target_area / area)) + dst_pts: npt.NDArray[np.float64] = intersects * intersect_scale + + dst_pts = np.round(dst_pts) + + matrix: npt.NDArray[np.float64] = cv2.getPerspectiveTransform(src_pts, dst_pts) + + result_height: int = int(np.max(dst_pts[:, 1])) + 1 + result_width: int = int(np.max(dst_pts[:, 0])) + 1 + + result: npt.NDArray[np.uint8] = cv2.warpPerspective( + image, + matrix, + (result_width, result_height), + flags=interpolation, + borderMode=cv2.BORDER_TRANSPARENT, + ) + + return result, dst_pts.astype(np.int32) diff --git a/vision/vector_deskew.py b/vision/vector_deskew.py deleted file mode 100644 index 9487197..0000000 --- a/vision/vector_deskew.py +++ /dev/null @@ -1,167 +0,0 @@ -import cv2 - -import coordinate_lengths -from vector_utils import * - - -def calculate_distance( - pixel1: Tuple[int, int], - pixel2: Tuple[int, int], - image_shape: Tuple[int, int, int], - focal_length: float, - rotation_deg: List[float], - altitude: float, -) -> float: - """ - Calculates the physical distance between two points on the ground represented by pixels - locations - - Parameters - ---------- - pixel1, pixel2: Tuple[int, int] - The two input pixels in [Y,X] form. The distance between them will be calculated - image_shape : Tuple[int, int, int] - The shape of the image (returned by `image.shape` when image is a numpy image array) - focal_length : float - The camera's focal length - rotation_deg : List[float] - The [roll, pitch, yaw] rotation in degrees - altitude: float - The altitude of the drone in any units. If an altitude is given, the units of the output - will be the units of the input. - Returns - ------- - distance : float - The distance between the two pixels. Units are the same units as `altitude` - """ - intersect1: npt.NDArray[np.float64] = pixel_intersect( - pixel1, image_shape, focal_length, rotation_deg, altitude - ) - intersect2: npt.NDArray[np.float64] = pixel_intersect( - pixel2, image_shape, focal_length, rotation_deg, altitude - ) - - # Calculate the distance between the two intersects - distance: float = np.linalg.norm(intersect1 - intersect2) - - return distance - - -def get_coordinates( - pixel: Tuple[int, int], - image_shape: Tuple[int, int, int], - focal_length: float, - rotation_deg: List[float], - drone_coordinates: List[float], - altitude_m: float, -) -> Tuple[float, float]: - """ - Calculates the coordinates of the given pixel - - Parameters - ---------- - pixel: Tuple[int, int] - The coordinates of the pixel in [Y, X] form - image_shape : Tuple[int, int, int] - The shape of the image (returned by `image.shape` when image is a numpy image array) - focal_length : float - The camera's focal length - rotation_deg: List[float] - The rotation of the drone/camera. The ROTATION_OFFSET in vector_utils.py will be applied - after. - drone_coordinates: List[float] - The coordinates of the drone in degrees of (latitude, longitude) - altitude_m: float - The altitude of the drone in meters - Returns - ------- - pixel_coordinates : Tuple[float, float] - The (latitude, longitude) coordinates of the pixel in degrees - """ - # Calculate the latitude and longitude lengths (in meters) - latitude_length = coordinate_lengths.latitude_length(drone_coordinates[0]) - longitude_length = coordinate_lengths.longitude_length(drone_coordinates[0]) - - # Find the pixel's intersect with the ground to get the location relative to the drone - intersect = pixel_intersect(pixel, image_shape, focal_length, rotation_deg, altitude_m) - - # Invert the X axis so that the longitude is correct - intersect[1] *= -1 - - # Convert the location to latitude and longitude and add it to the drone's coordinates - pixel_lat = drone_coordinates[0] + intersect[0] / latitude_length - pixel_lon = drone_coordinates[1] + intersect[1] / longitude_length - - return pixel_lat, pixel_lon - - -def deskew( - image: npt.NDArray[np.uint8], - focal_length: float, - rotation_deg: List[float], - scale: Optional[float] = 1, - interpolation: Optional[int] = cv2.INTER_LINEAR, -) -> npt.NDArray[np.uint8]: - """ - Distorts an image to generate an overhead view of the photo. Parts of the image will be - completely black where the camera could not see. - - Parameters - ---------- - image : npt.NDArray[np.uint8] - The input image to deskew. Aspect ratio should match the camera sensor - focal_length : float - The camera's focal length - used to generate the camera's fields of view - rotation_deg : List[float] - The [roll, pitch, yaw] rotation in degrees - scale: Optional[float] - Scales the resolution of the output. A value of 1 makes the area inside the camera view - equal to the original image. Defaults to 1. - interpolation: Optional[int] - The cv2 interpolation type to be used when deskewing. - Returns - ------- - deskewed_image : npt.NDArray[np.uint8] - The deskewed image - the image is flattened with black areas in the margins - """ - orig_height, orig_width, _ = image.shape - - src_pts = np.float32([[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]]) - - # Convert XY to YX - flipped = np.flip(src_pts, axis=1) - - intersects = np.float32( - [pixel_intersect(point, image.shape, focal_length, rotation_deg, 1) for point in flipped] - ) - - # Flip the endpoints over the X axis (top left is 0,0 for images) - intersects[:, 1] *= -1 - - # Subtract the minimum on both axes so the minimum values on each axis are 0 - intersects -= intersects.min(axis=0) - - # Find the area using cv2 contour tools - area: float = cv2.contourArea(intersects) - - # Scale the output so the area of the important pixels is about the same as the starting image - target_area = image.shape[0] * image.shape[1] * scale - intersect_scale = np.sqrt(target_area / area) - dst_pts = intersects * intersect_scale - - dst_pts = np.round(dst_pts) - - matrix = cv2.getPerspectiveTransform(src_pts, dst_pts) - - result_height = int(dst_pts[:, 1].max()) + 1 - result_width = int(dst_pts[:, 0].max()) + 1 - - result = cv2.warpPerspective( - image, - matrix, - (result_width, result_height), - flags=interpolation, - borderMode=cv2.BORDER_TRANSPARENT, - ) - - return result diff --git a/vision/vector_utils.py b/vision/vector_utils.py index aaa89b5..372e797 100644 --- a/vision/vector_utils.py +++ b/vision/vector_utils.py @@ -1,9 +1,12 @@ +"""Functions that use vectors to calculate camera intersections with the ground""" + from typing import List, Tuple -import numpy as np import numpy.typing as npt -from scipy.spatial.transform import Rotation as R -# Sony RX100 vii sensor size +import numpy as np +from scipy.spatial.transform import Rotation + +# Sony RX100 VII sensor size SENSOR_WIDTH = 13.2 SENSOR_HEIGHT = 8.8 @@ -12,66 +15,48 @@ ROTATION_OFFSET = [0.0, 0.0, 0.0] -def get_fov(focal_length: float, sensor_size: float) -> float: +def pixel_intersect( + pixel: Tuple[int, int], + image_shape: Tuple[int, ...], + focal_length: float, + rotation_deg: List[float], + height: float, +) -> npt.NDArray[np.float64]: """ - Converts a given focal length and sensor length to the corresponding field of view in radians + Finds the intersection [X,Y] of a given pixel with the ground relative to the camera. + A camera with no rotation points in the +X direction and is centered at [0, 0, height]. Parameters ---------- + pixel : Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by image.shape when image is a numpy image array) focal_length : float - The focal length of the camera in millimeters - sensor_size: - The sensor size along one axis in millimeters - + The camera's focal length + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + height : float + The height that the image was taken at. The units of the output will be the units of the + input. Returns ------- - fov : float - The field of view in radians - """ - - return 2 * np.arctan(sensor_size / (2 * focal_length)) - - -def focal_length_to_fovs(focal_length: float) -> Tuple[float, float]: + intersect : npt.NDArray[np.float64] + The coordinates [X,Y] where the pixel's vector intersects with the ground. """ - Converts a given focal length to the horizontal and vertical fields of view in radians - Uses SENSOR_WIDTH and SENSOR_HEIGHT, which are set to 13.2 and 8.8 respectively, the size of - the sensor in the Sony RX100 vii - - Parameters - ---------- - focal_length: float - The focal length of the camera in millimeters - Returns - ------- - fields_of_view : Tuple[float, float] - The fields of view in radians - Format is [horizontal, vertical] - """ - return get_fov(focal_length, SENSOR_WIDTH), get_fov(focal_length, SENSOR_HEIGHT) + # Create the normalized vector representing the direction of the given pixel + vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) + rotation_deg = np.deg2rad(rotation_deg).tolist() -def edge_angle(v_angle: float, h_angle: float) -> float: - """ - Finds the angle such that rotating by edge_angle on the Y axis then rotating by h_angle on - the Z axis gives a vector an angle v_angle with the Y axis + vector = euler_rotate(vector, rotation_deg) - Can be derived using a square pyramid of height 1 + vector = euler_rotate(vector, ROTATION_OFFSET) - Parameters - ---------- - v_angle : float - The vertical angle - h_angle : float - The horizontal angle - Returns - ------- - edge_angle : float - The angle to rotate vertically - """ + intersect: npt.NDArray[np.float64] = plane_collision(vector, height) - return np.arctan(np.tan(v_angle) * np.cos(h_angle)) + return intersect def plane_collision( @@ -95,7 +80,7 @@ def plane_collision( """ # Find the "time" at which the line intersects the plane # Line is defined as ray_direction * time + origin. - # Origin is the point at X, Y, Z = 0, 0, height + # Origin is the point at X, Y, Z = (0, 0, height) time: np.float64 = -height / ray_direction[2] intersect: npt.NDArray[np.float64] = ray_direction[:2] * time @@ -103,58 +88,37 @@ def plane_collision( return intersect -def euler_rotate( - vector: npt.NDArray[np.float64], rotation_deg: List[float] +def pixel_vector( + pixel: Tuple[int, int], image_shape: Tuple[int, ...], focal_length: float ) -> npt.NDArray[np.float64]: """ - Rotates a vector based on a given roll, pitch, and yaw. - - Follows the MAVSDK.EulerAngle convention - positive roll is banking to the right, positive - pitch is pitching nose up, positive yaw is clock-wise seen from above. + Generates a vector representing the given pixel. + Pixels are in row-major form [Y, X] to match numpy indexing. Parameters ---------- - vector: npt.NDArray[np.float64] - A vector represented by an XYZ coordinate that will be rotated - rotation_deg: List[float] - The [roll, pitch, yaw] rotation in radians - Returns - ------- - rotated_vector : npt.NDArray[np.float64] - The vector which has been rotated - """ - - # Reverse the Y and Z rotation to match MAVSDK convention - rotation_deg[1] *= -1 - rotation_deg[2] *= -1 - - return R.from_euler("xyz", rotation_deg).apply(vector) - - -def camera_vector(h_angle: float, v_angle: float) -> npt.NDArray[np.float64]: - """ - Generates a vector with an angle h_angle with the horizontal and an angle v_angle with the - vertical. - - Using camera fovs will generate a vector that represents the corner of the camera's view. + pixel : Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by image.shape when image is a numpy image array) + focal_length : float + The camera's focal length - used to generate the camera's fields of view - Parameters - ---------- - h_angle : float - The angle in radians to rotate horizontally - v_angle : float - The angle in radians to rotate vertically Returns ------- - camera_vector : npt.NDArray[np.float64] - The vector which represents a given location in an image + pixel_vector : npt.NDArray[np.float64] + The vector that represents the direction of the given pixel """ - # Calculate the vertical rotation needed for the final vector to have the desired direction - edge: float = edge_angle(v_angle, h_angle) + # Find the FOVs using the focal length + fov_h: float + fov_v: float + fov_h, fov_v = focal_length_to_fovs(focal_length) - vector: npt.NDArray[np.float64] = np.array([1, 0, 0], dtype=np.float64) - return euler_rotate(vector, [0, edge, -h_angle]) + return camera_vector( + pixel_angle(fov_h, pixel[1] / image_shape[1]), + pixel_angle(fov_v, pixel[0] / image_shape[0]), + ) def pixel_angle(fov: float, ratio: float) -> float: @@ -181,78 +145,117 @@ def pixel_angle(fov: float, ratio: float) -> float: return np.arctan(np.tan(fov / 2) * (1 - 2 * ratio)) -def pixel_vector( - pixel: Tuple[int, int], image_shape: Tuple[int, int, int], focal_length: float -) -> npt.NDArray[np.float64]: +def focal_length_to_fovs(focal_length: float) -> Tuple[float, float]: """ - Generates a vector representing the given pixel. - Pixels are in row-major form [Y, X] to match numpy indexing. + Converts a given focal length to the horizontal and vertical fields of view in radians + + Uses SENSOR_WIDTH and SENSOR_HEIGHT, which are set to 13.2 and 8.8 respectively, the size of + the sensor in the Sony RX100 vii + + Parameters + ---------- + focal_length: float + The focal length of the camera in millimeters + Returns + ------- + fields_of_view : Tuple[float, float] + The fields of view in radians + Format is [horizontal, vertical] + """ + return get_fov(focal_length, SENSOR_WIDTH), get_fov(focal_length, SENSOR_HEIGHT) + + +def get_fov(focal_length: float, sensor_size: float) -> float: + """ + Converts a given focal length and sensor length to the corresponding field of view in radians Parameters ---------- - pixel : Tuple[int, int] - The coordinates of the pixel in [Y, X] form - image_shape : Tuple[int, int, int] - The shape of the image (returned by image.shape when image is a numpy image array) focal_length : float - The camera's focal length - used to generate the camera's fields of view + The focal length of the camera in millimeters + sensor_size: + The sensor size along one axis in millimeters Returns ------- - pixel_vector : npt.NDArray[np.float64] - The vector that represents the direction of the given pixel + fov : float + The field of view in radians """ - # Find the FOVs using the focal length - fov_h: float - fov_v: float - fov_h, fov_v = focal_length_to_fovs(focal_length) + return 2 * np.arctan(sensor_size / (2 * focal_length)) - return camera_vector( - pixel_angle(fov_h, pixel[1] / image_shape[1]), - pixel_angle(fov_v, pixel[0] / image_shape[0]), - ) +def camera_vector(h_angle: float, v_angle: float) -> npt.NDArray[np.float64]: + """ + Generates a vector with an angle h_angle with the horizontal and an angle v_angle with the + vertical. -def pixel_intersect( - pixel: Tuple[int, int], - image_shape: Tuple[int, int, int], - focal_length: float, - rotation_deg: List[float], - height: float, -) -> npt.NDArray[np.float64]: + Using camera fovs will generate a vector that represents the corner of the camera's view. + + Parameters + ---------- + h_angle : float + The angle in radians to rotate horizontally + v_angle : float + The angle in radians to rotate vertically + Returns + ------- + camera_vector : npt.NDArray[np.float64] + The vector which represents a given location in an image """ - Finds the intersection [X,Y] of a given pixel with the ground. - A camera with no rotation points in the +X direction and is centered at [0, 0, height]. + + # Calculate the vertical rotation needed for the final vector to have the desired direction + edge: float = edge_angle(v_angle, h_angle) + + vector: npt.NDArray[np.float64] = np.array([1, 0, 0], dtype=np.float64) + return euler_rotate(vector, [0, edge, -h_angle]) + + +def edge_angle(v_angle: float, h_angle: float) -> float: + """ + Finds the angle such that rotating by edge_angle on the Y axis then rotating by h_angle on + the Z axis gives a vector an angle v_angle with the Y axis + + Can be derived using a square pyramid of height 1 Parameters ---------- - pixel : Tuple[int, int] - The coordinates of the pixel in [Y, X] form - image_shape : Tuple[int, int, int] - The shape of the image (returned by image.shape when image is a numpy image array) - focal_length : float - The camera's focal length - rotation_deg : List[float] - The [roll, pitch, yaw] rotation in degrees - height : float - The height that the image was taken at. The units of the output will be the units of the - input. + v_angle : float + The vertical angle + h_angle : float + The horizontal angle Returns ------- - intersect : npt.NDArray[np.float64] - The coordinates [X,Y] where the pixel's vector intersects with the ground. + edge_angle : float + The angle to rotate vertically """ - # Create the normalized vector representing the direction of the given pixel - vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) + return np.arctan(np.tan(v_angle) * np.cos(h_angle)) - rotation_deg = np.deg2rad(rotation_deg).tolist() - vector = euler_rotate(vector, rotation_deg) +def euler_rotate( + vector: npt.NDArray[np.float64], rotation_deg: List[float] +) -> npt.NDArray[np.float64]: + """ + Rotates a vector based on a given roll, pitch, and yaw. - vector = euler_rotate(vector, ROTATION_OFFSET) + Follows the MAVSDK.EulerAngle convention - positive roll is banking to the right, positive + pitch is pitching nose up, positive yaw is clock-wise seen from above. - intersect: npt.NDArray[np.float64] = plane_collision(vector, height) + Parameters + ---------- + vector: npt.NDArray[np.float64] + A vector represented by an XYZ coordinate that will be rotated + rotation_deg: List[float] + The [roll, pitch, yaw] rotation in radians + Returns + ------- + rotated_vector : npt.NDArray[np.float64] + The vector which has been rotated + """ - return intersect + # Reverse the Y and Z rotation to match MAVSDK convention + rotation_deg[1] *= -1 + rotation_deg[2] *= -1 + + return Rotation.from_euler("xyz", rotation_deg).apply(vector) From 0dab8decee3bc98f19b84598472c1811e0902327 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Mon, 23 May 2022 23:53:40 -0500 Subject: [PATCH 08/14] Removed vector_mapping --- vision/vector_mapping.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 vision/vector_mapping.py diff --git a/vision/vector_mapping.py b/vision/vector_mapping.py deleted file mode 100644 index 74161af..0000000 --- a/vision/vector_mapping.py +++ /dev/null @@ -1,29 +0,0 @@ -import numpy as np -import cv2 - - -def feather_edges(image, blur_amount): - mask = np.full(np.array(image.shape[:2]) - blur_amount, 255) - mask = cv2.copyMakeBorder(mask, - int(blur_amount / 2), - round(blur_amount / 2), - int(blur_amount / 2), - round(blur_amount / 2), - cv2.BORDER_CONSTANT, - value=[0, 0, 0, 0]) - mask = cv2.blur(mask, [blur_amount, blur_amount]) - - image[:, :, 3] = mask - - return image - - -def alpha_over(foreground, background): - back_alpha = np.expand_dims(background[:, :, 3] / 255, axis=2) - fore_alpha = np.expand_dims(foreground[:, :, 3] / 255, axis=2) - - foreground[:, :, 3] = np.max((background[:, :, 3], foreground[:, :, 3])) - - background = fore_alpha * foreground + back_alpha * background * (1 - fore_alpha) - - return background \ No newline at end of file From 80eb6dbed8836278ac0dc70e27f306695eff8530 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Tue, 24 May 2022 01:15:20 -0500 Subject: [PATCH 09/14] Moved files to /common --- vision/camera_distances.py | 102 -------------- vision/coordinate_lengths.py | 63 --------- vision/deskew.py | 98 ------------- vision/vector_utils.py | 261 ----------------------------------- 4 files changed, 524 deletions(-) delete mode 100644 vision/camera_distances.py delete mode 100644 vision/coordinate_lengths.py delete mode 100644 vision/deskew.py delete mode 100644 vision/vector_utils.py diff --git a/vision/camera_distances.py b/vision/camera_distances.py deleted file mode 100644 index fc6f8fa..0000000 --- a/vision/camera_distances.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Functions for calculating locations of objects in an image""" - -from typing import Tuple, List - -import numpy as np -import numpy.typing as npt - -import coordinate_lengths -from vector_utils import pixel_intersect - - -def get_coordinates( - pixel: Tuple[int, int], - image_shape: Tuple[int, int, int], - focal_length: float, - rotation_deg: List[float], - drone_coordinates: List[float], - altitude_m: float, -) -> Tuple[float, float]: - """ - Calculates the coordinates of the given pixel - - Parameters - ---------- - pixel: Tuple[int, int] - The coordinates of the pixel in [Y, X] form - image_shape : Tuple[int, int, int] - The shape of the image (returned by `image.shape` when image is a numpy image array) - focal_length : float - The camera's focal length - rotation_deg: List[float] - The rotation of the drone/camera. The ROTATION_OFFSET in vector_utils.py will be applied - after. - drone_coordinates: List[float] - The coordinates of the drone in degrees of (latitude, longitude) - altitude_m: float - The altitude of the drone in meters - Returns - ------- - pixel_coordinates : Tuple[float, float] - The (latitude, longitude) coordinates of the pixel in degrees - """ - # Calculate the latitude and longitude lengths (in meters) - latitude_length: float = coordinate_lengths.latitude_length(drone_coordinates[0]) - longitude_length: float = coordinate_lengths.longitude_length(drone_coordinates[0]) - - # Find the pixel's intersect with the ground to get the location relative to the drone - intersect: npt.NDArray[np.float64] = pixel_intersect( - pixel, image_shape, focal_length, rotation_deg, altitude_m - ) - - # Invert the X axis so that the longitude is correct - intersect[1] *= -1 - - # Convert the location to latitude and longitude and add it to the drone's coordinates - pixel_lat = drone_coordinates[0] + intersect[0] / latitude_length - pixel_lon = drone_coordinates[1] + intersect[1] / longitude_length - - return pixel_lat, pixel_lon - - -def calculate_distance( - pixel1: Tuple[int, int], - pixel2: Tuple[int, int], - image_shape: Tuple[int, int, int], - focal_length: float, - rotation_deg: List[float], - altitude: float, -) -> float: - """ - Calculates the physical distance between two points on the ground represented by pixel - locations. Units of `distance` are the same as the units of `altitude` - - Parameters - ---------- - pixel1, pixel2: Tuple[int, int] - The two input pixel locations in [Y,X] form. The distance between them will be calculated - image_shape : Tuple[int, int, int] - The shape of the image (returned by `image.shape` when image is a numpy image array) - focal_length : float - The camera's focal length - rotation_deg : List[float] - The [roll, pitch, yaw] rotation in degrees - altitude: float - The altitude of the drone in any units. If an altitude is given, the units of the output - will be the units of the input. - Returns - ------- - distance : float - The distance between the two pixels. Units are the same units as `altitude` - """ - intersect1: npt.NDArray[np.float64] = pixel_intersect( - pixel1, image_shape, focal_length, rotation_deg, altitude - ) - intersect2: npt.NDArray[np.float64] = pixel_intersect( - pixel2, image_shape, focal_length, rotation_deg, altitude - ) - - # Calculate the distance between the two intersects - distance: float = float(np.linalg.norm(intersect1 - intersect2)) - - return distance diff --git a/vision/coordinate_lengths.py b/vision/coordinate_lengths.py deleted file mode 100644 index 01c78d5..0000000 --- a/vision/coordinate_lengths.py +++ /dev/null @@ -1,63 +0,0 @@ -"""Functions for calculating coordinate degree lengths""" - -import numpy as np - - -def latitude_length(latitude: float) -> float: - """ - Returns the distance in meters of one degree of latitude at a particular longitude - - Parameter - --------- - latitude : float - The latitude in degrees - - Returns - ------- - latitude_length - The length of a degree of latitude in meters at the given latitude - - References - ---------- - https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree - """ - - # Convert to radians for trig functions - latitude = np.deg2rad(latitude) - - distance: float = ( - 111132.92 - - 559.82 * np.cos(2 * latitude) - + 1.175 * np.cos(4 * latitude) - - 0.0023 * np.cos(6 * latitude) - ) - - return distance - - -def longitude_length(latitude: float) -> float: - """ - Calculates the distance in meters of one degree of longitude at that longitude - - Parameter - --------- - latitude : float - The latitude in degrees - - Returns - ------- - longitude_length - The length of a degree of longitude in meters at the given latitude - References - ---------- - https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree - """ - - # Convert degrees to radians for trig functions - latitude = np.deg2rad(latitude) - - distance: float = ( - 111412.84 * np.cos(latitude) - 93.5 * np.cos(3 * latitude) + 0.118 * np.cos(5 * latitude) - ) - - return distance diff --git a/vision/deskew.py b/vision/deskew.py deleted file mode 100644 index 71c0d88..0000000 --- a/vision/deskew.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Distorts an image to generate an overhead view of the photo.""" - -from typing import List, Tuple, Optional - -import cv2 -import numpy as np -import numpy.typing as npt - -from vector_utils import pixel_intersect - - -def deskew( - image: npt.NDArray[np.uint8], - focal_length: float, - rotation_deg: List[float], - scale: float = 1, - interpolation: Optional[int] = cv2.INTER_LINEAR, -) -> Tuple[npt.NDArray[np.uint8], npt.NDArray[np.float64]]: - """ - Distorts an image to generate an overhead view of the photo. Parts of the image will be - completely black where the camera could not see. - - Parameters - ---------- - image : npt.NDArray[np.uint8] - The input image to deskew. Aspect ratio should match the camera sensor - focal_length : float - The camera's focal length - used to generate the camera's fields of view - rotation_deg : List[float] - The [roll, pitch, yaw] rotation in degrees - scale: Optional[float] - Scales the resolution of the output. A value of 1 makes the area inside the camera view - equal to the original image. Defaults to 1. - interpolation: Optional[int] - The cv2 interpolation type to be used when deskewing. - Returns - ------- - (deskewed_image, corner_points) : Tuple[npt.NDArray[np.uint8], npt.NDArray[np.float64]] - deskewed_image : npt.NDArray[np.uint8] - The deskewed image - the image is flattened with black areas in the margins - corner_points : - The corner points of the result in the image. - Points are in order based on their location in the original image. - Format is: (top left, top right, bottom right, bottom left), or - 1--2 - | | - 4--3 - """ - orig_height: int - orig_width: int - orig_height, orig_width, _ = image.shape - - # Generate points in the format - # 1--2 - # | | - # 4--3 - - src_pts: npt.NDArray[np.float32] = np.array( - [[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]], dtype=np.float32 - ) - intersects: npt.NDArray[np.float32] = np.array( - [ - pixel_intersect(point, image.shape, focal_length, rotation_deg, 1) - for point in np.flip(src_pts, axis=1) # use np.flip to convert XY to YX - ], - dtype=np.float32, - ) - - # Flip the endpoints over the X axis (top left is 0,0 for images) - intersects[:, 1] *= -1 - - # Subtract the minimum on both axes so the minimum values on each axis are 0 - intersects -= np.min(intersects, axis=0) - - # Find the area using cv2 contour tools - area: float = cv2.contourArea(intersects) - - # Scale the output so the area of the important pixels is about the same as the starting image - target_area: float = float(image.shape[0]) * (float(image.shape[1]) * scale) - intersect_scale: np.float64 = np.float64(np.sqrt(target_area / area)) - dst_pts: npt.NDArray[np.float64] = intersects * intersect_scale - - dst_pts = np.round(dst_pts) - - matrix: npt.NDArray[np.float64] = cv2.getPerspectiveTransform(src_pts, dst_pts) - - result_height: int = int(np.max(dst_pts[:, 1])) + 1 - result_width: int = int(np.max(dst_pts[:, 0])) + 1 - - result: npt.NDArray[np.uint8] = cv2.warpPerspective( - image, - matrix, - (result_width, result_height), - flags=interpolation, - borderMode=cv2.BORDER_TRANSPARENT, - ) - - return result, dst_pts.astype(np.int32) diff --git a/vision/vector_utils.py b/vision/vector_utils.py deleted file mode 100644 index 372e797..0000000 --- a/vision/vector_utils.py +++ /dev/null @@ -1,261 +0,0 @@ -"""Functions that use vectors to calculate camera intersections with the ground""" - -from typing import List, Tuple -import numpy.typing as npt - -import numpy as np -from scipy.spatial.transform import Rotation - -# Sony RX100 VII sensor size -SENSOR_WIDTH = 13.2 -SENSOR_HEIGHT = 8.8 - -# The rotation offset of the camera to the drone. The offset is applied in pixel_intersect -# Set to [0.0, -90.0, 0.0] when the camera is facing directly downwards -ROTATION_OFFSET = [0.0, 0.0, 0.0] - - -def pixel_intersect( - pixel: Tuple[int, int], - image_shape: Tuple[int, ...], - focal_length: float, - rotation_deg: List[float], - height: float, -) -> npt.NDArray[np.float64]: - """ - Finds the intersection [X,Y] of a given pixel with the ground relative to the camera. - A camera with no rotation points in the +X direction and is centered at [0, 0, height]. - - Parameters - ---------- - pixel : Tuple[int, int] - The coordinates of the pixel in [Y, X] form - image_shape : Tuple[int, int, int] - The shape of the image (returned by image.shape when image is a numpy image array) - focal_length : float - The camera's focal length - rotation_deg : List[float] - The [roll, pitch, yaw] rotation in degrees - height : float - The height that the image was taken at. The units of the output will be the units of the - input. - Returns - ------- - intersect : npt.NDArray[np.float64] - The coordinates [X,Y] where the pixel's vector intersects with the ground. - """ - - # Create the normalized vector representing the direction of the given pixel - vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) - - rotation_deg = np.deg2rad(rotation_deg).tolist() - - vector = euler_rotate(vector, rotation_deg) - - vector = euler_rotate(vector, ROTATION_OFFSET) - - intersect: npt.NDArray[np.float64] = plane_collision(vector, height) - - return intersect - - -def plane_collision( - ray_direction: npt.NDArray[np.float64], height: float -) -> npt.NDArray[np.float64]: - """ - Returns the point where a ray intersects the XY plane - - Parameters - ---------- - ray_direction : npt.NDArray[np.float64] - XYZ coordinates that represent the direction a ray faces from (0, 0, 0) - height : float - The Z coordinate for the starting height of the ray; can be any units - - Returns - ------- - intersect : npt.NDArray[np.float64] - The ray's intersection with the plane in [X,Y] format - - """ - # Find the "time" at which the line intersects the plane - # Line is defined as ray_direction * time + origin. - # Origin is the point at X, Y, Z = (0, 0, height) - - time: np.float64 = -height / ray_direction[2] - intersect: npt.NDArray[np.float64] = ray_direction[:2] * time - - return intersect - - -def pixel_vector( - pixel: Tuple[int, int], image_shape: Tuple[int, ...], focal_length: float -) -> npt.NDArray[np.float64]: - """ - Generates a vector representing the given pixel. - Pixels are in row-major form [Y, X] to match numpy indexing. - - Parameters - ---------- - pixel : Tuple[int, int] - The coordinates of the pixel in [Y, X] form - image_shape : Tuple[int, int, int] - The shape of the image (returned by image.shape when image is a numpy image array) - focal_length : float - The camera's focal length - used to generate the camera's fields of view - - Returns - ------- - pixel_vector : npt.NDArray[np.float64] - The vector that represents the direction of the given pixel - """ - - # Find the FOVs using the focal length - fov_h: float - fov_v: float - fov_h, fov_v = focal_length_to_fovs(focal_length) - - return camera_vector( - pixel_angle(fov_h, pixel[1] / image_shape[1]), - pixel_angle(fov_v, pixel[0] / image_shape[0]), - ) - - -def pixel_angle(fov: float, ratio: float) -> float: - """ - Calculates a pixel's angle from the center of the camera on a single axis. Analogous to the - pixel's "fov" - - Only one component of the pixel is used here, call this function for each X and Y - - Parameters - ---------- - fov : float - The field of view of the camera in radians olong a given axis - ratio : float - The pixel's position as a ratio of the coordinate to the length of the image - Example: For an image that is 1080 pixels wide, a pixel at position 270 would have a - ratio of 0.25 - - Returns - ------- - angle : float - The pixel's angle from the center of the camera along a single axis - """ - return np.arctan(np.tan(fov / 2) * (1 - 2 * ratio)) - - -def focal_length_to_fovs(focal_length: float) -> Tuple[float, float]: - """ - Converts a given focal length to the horizontal and vertical fields of view in radians - - Uses SENSOR_WIDTH and SENSOR_HEIGHT, which are set to 13.2 and 8.8 respectively, the size of - the sensor in the Sony RX100 vii - - Parameters - ---------- - focal_length: float - The focal length of the camera in millimeters - Returns - ------- - fields_of_view : Tuple[float, float] - The fields of view in radians - Format is [horizontal, vertical] - """ - return get_fov(focal_length, SENSOR_WIDTH), get_fov(focal_length, SENSOR_HEIGHT) - - -def get_fov(focal_length: float, sensor_size: float) -> float: - """ - Converts a given focal length and sensor length to the corresponding field of view in radians - - Parameters - ---------- - focal_length : float - The focal length of the camera in millimeters - sensor_size: - The sensor size along one axis in millimeters - - Returns - ------- - fov : float - The field of view in radians - """ - - return 2 * np.arctan(sensor_size / (2 * focal_length)) - - -def camera_vector(h_angle: float, v_angle: float) -> npt.NDArray[np.float64]: - """ - Generates a vector with an angle h_angle with the horizontal and an angle v_angle with the - vertical. - - Using camera fovs will generate a vector that represents the corner of the camera's view. - - Parameters - ---------- - h_angle : float - The angle in radians to rotate horizontally - v_angle : float - The angle in radians to rotate vertically - Returns - ------- - camera_vector : npt.NDArray[np.float64] - The vector which represents a given location in an image - """ - - # Calculate the vertical rotation needed for the final vector to have the desired direction - edge: float = edge_angle(v_angle, h_angle) - - vector: npt.NDArray[np.float64] = np.array([1, 0, 0], dtype=np.float64) - return euler_rotate(vector, [0, edge, -h_angle]) - - -def edge_angle(v_angle: float, h_angle: float) -> float: - """ - Finds the angle such that rotating by edge_angle on the Y axis then rotating by h_angle on - the Z axis gives a vector an angle v_angle with the Y axis - - Can be derived using a square pyramid of height 1 - - Parameters - ---------- - v_angle : float - The vertical angle - h_angle : float - The horizontal angle - Returns - ------- - edge_angle : float - The angle to rotate vertically - """ - - return np.arctan(np.tan(v_angle) * np.cos(h_angle)) - - -def euler_rotate( - vector: npt.NDArray[np.float64], rotation_deg: List[float] -) -> npt.NDArray[np.float64]: - """ - Rotates a vector based on a given roll, pitch, and yaw. - - Follows the MAVSDK.EulerAngle convention - positive roll is banking to the right, positive - pitch is pitching nose up, positive yaw is clock-wise seen from above. - - Parameters - ---------- - vector: npt.NDArray[np.float64] - A vector represented by an XYZ coordinate that will be rotated - rotation_deg: List[float] - The [roll, pitch, yaw] rotation in radians - Returns - ------- - rotated_vector : npt.NDArray[np.float64] - The vector which has been rotated - """ - - # Reverse the Y and Z rotation to match MAVSDK convention - rotation_deg[1] *= -1 - rotation_deg[2] *= -1 - - return Rotation.from_euler("xyz", rotation_deg).apply(vector) From 3ceb95a0028d2ec85a111f392f6108d9a77f9508 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Tue, 24 May 2022 20:02:36 -0500 Subject: [PATCH 10/14] Revert "Moved files to /common" This reverts commit 80eb6dbed8836278ac0dc70e27f306695eff8530. --- vision/camera_distances.py | 102 ++++++++++++++ vision/coordinate_lengths.py | 63 +++++++++ vision/deskew.py | 98 +++++++++++++ vision/vector_utils.py | 261 +++++++++++++++++++++++++++++++++++ 4 files changed, 524 insertions(+) create mode 100644 vision/camera_distances.py create mode 100644 vision/coordinate_lengths.py create mode 100644 vision/deskew.py create mode 100644 vision/vector_utils.py diff --git a/vision/camera_distances.py b/vision/camera_distances.py new file mode 100644 index 0000000..fc6f8fa --- /dev/null +++ b/vision/camera_distances.py @@ -0,0 +1,102 @@ +"""Functions for calculating locations of objects in an image""" + +from typing import Tuple, List + +import numpy as np +import numpy.typing as npt + +import coordinate_lengths +from vector_utils import pixel_intersect + + +def get_coordinates( + pixel: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + drone_coordinates: List[float], + altitude_m: float, +) -> Tuple[float, float]: + """ + Calculates the coordinates of the given pixel + + Parameters + ---------- + pixel: Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by `image.shape` when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg: List[float] + The rotation of the drone/camera. The ROTATION_OFFSET in vector_utils.py will be applied + after. + drone_coordinates: List[float] + The coordinates of the drone in degrees of (latitude, longitude) + altitude_m: float + The altitude of the drone in meters + Returns + ------- + pixel_coordinates : Tuple[float, float] + The (latitude, longitude) coordinates of the pixel in degrees + """ + # Calculate the latitude and longitude lengths (in meters) + latitude_length: float = coordinate_lengths.latitude_length(drone_coordinates[0]) + longitude_length: float = coordinate_lengths.longitude_length(drone_coordinates[0]) + + # Find the pixel's intersect with the ground to get the location relative to the drone + intersect: npt.NDArray[np.float64] = pixel_intersect( + pixel, image_shape, focal_length, rotation_deg, altitude_m + ) + + # Invert the X axis so that the longitude is correct + intersect[1] *= -1 + + # Convert the location to latitude and longitude and add it to the drone's coordinates + pixel_lat = drone_coordinates[0] + intersect[0] / latitude_length + pixel_lon = drone_coordinates[1] + intersect[1] / longitude_length + + return pixel_lat, pixel_lon + + +def calculate_distance( + pixel1: Tuple[int, int], + pixel2: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + altitude: float, +) -> float: + """ + Calculates the physical distance between two points on the ground represented by pixel + locations. Units of `distance` are the same as the units of `altitude` + + Parameters + ---------- + pixel1, pixel2: Tuple[int, int] + The two input pixel locations in [Y,X] form. The distance between them will be calculated + image_shape : Tuple[int, int, int] + The shape of the image (returned by `image.shape` when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + altitude: float + The altitude of the drone in any units. If an altitude is given, the units of the output + will be the units of the input. + Returns + ------- + distance : float + The distance between the two pixels. Units are the same units as `altitude` + """ + intersect1: npt.NDArray[np.float64] = pixel_intersect( + pixel1, image_shape, focal_length, rotation_deg, altitude + ) + intersect2: npt.NDArray[np.float64] = pixel_intersect( + pixel2, image_shape, focal_length, rotation_deg, altitude + ) + + # Calculate the distance between the two intersects + distance: float = float(np.linalg.norm(intersect1 - intersect2)) + + return distance diff --git a/vision/coordinate_lengths.py b/vision/coordinate_lengths.py new file mode 100644 index 0000000..01c78d5 --- /dev/null +++ b/vision/coordinate_lengths.py @@ -0,0 +1,63 @@ +"""Functions for calculating coordinate degree lengths""" + +import numpy as np + + +def latitude_length(latitude: float) -> float: + """ + Returns the distance in meters of one degree of latitude at a particular longitude + + Parameter + --------- + latitude : float + The latitude in degrees + + Returns + ------- + latitude_length + The length of a degree of latitude in meters at the given latitude + + References + ---------- + https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree + """ + + # Convert to radians for trig functions + latitude = np.deg2rad(latitude) + + distance: float = ( + 111132.92 + - 559.82 * np.cos(2 * latitude) + + 1.175 * np.cos(4 * latitude) + - 0.0023 * np.cos(6 * latitude) + ) + + return distance + + +def longitude_length(latitude: float) -> float: + """ + Calculates the distance in meters of one degree of longitude at that longitude + + Parameter + --------- + latitude : float + The latitude in degrees + + Returns + ------- + longitude_length + The length of a degree of longitude in meters at the given latitude + References + ---------- + https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree + """ + + # Convert degrees to radians for trig functions + latitude = np.deg2rad(latitude) + + distance: float = ( + 111412.84 * np.cos(latitude) - 93.5 * np.cos(3 * latitude) + 0.118 * np.cos(5 * latitude) + ) + + return distance diff --git a/vision/deskew.py b/vision/deskew.py new file mode 100644 index 0000000..71c0d88 --- /dev/null +++ b/vision/deskew.py @@ -0,0 +1,98 @@ +"""Distorts an image to generate an overhead view of the photo.""" + +from typing import List, Tuple, Optional + +import cv2 +import numpy as np +import numpy.typing as npt + +from vector_utils import pixel_intersect + + +def deskew( + image: npt.NDArray[np.uint8], + focal_length: float, + rotation_deg: List[float], + scale: float = 1, + interpolation: Optional[int] = cv2.INTER_LINEAR, +) -> Tuple[npt.NDArray[np.uint8], npt.NDArray[np.float64]]: + """ + Distorts an image to generate an overhead view of the photo. Parts of the image will be + completely black where the camera could not see. + + Parameters + ---------- + image : npt.NDArray[np.uint8] + The input image to deskew. Aspect ratio should match the camera sensor + focal_length : float + The camera's focal length - used to generate the camera's fields of view + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + scale: Optional[float] + Scales the resolution of the output. A value of 1 makes the area inside the camera view + equal to the original image. Defaults to 1. + interpolation: Optional[int] + The cv2 interpolation type to be used when deskewing. + Returns + ------- + (deskewed_image, corner_points) : Tuple[npt.NDArray[np.uint8], npt.NDArray[np.float64]] + deskewed_image : npt.NDArray[np.uint8] + The deskewed image - the image is flattened with black areas in the margins + corner_points : + The corner points of the result in the image. + Points are in order based on their location in the original image. + Format is: (top left, top right, bottom right, bottom left), or + 1--2 + | | + 4--3 + """ + orig_height: int + orig_width: int + orig_height, orig_width, _ = image.shape + + # Generate points in the format + # 1--2 + # | | + # 4--3 + + src_pts: npt.NDArray[np.float32] = np.array( + [[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]], dtype=np.float32 + ) + intersects: npt.NDArray[np.float32] = np.array( + [ + pixel_intersect(point, image.shape, focal_length, rotation_deg, 1) + for point in np.flip(src_pts, axis=1) # use np.flip to convert XY to YX + ], + dtype=np.float32, + ) + + # Flip the endpoints over the X axis (top left is 0,0 for images) + intersects[:, 1] *= -1 + + # Subtract the minimum on both axes so the minimum values on each axis are 0 + intersects -= np.min(intersects, axis=0) + + # Find the area using cv2 contour tools + area: float = cv2.contourArea(intersects) + + # Scale the output so the area of the important pixels is about the same as the starting image + target_area: float = float(image.shape[0]) * (float(image.shape[1]) * scale) + intersect_scale: np.float64 = np.float64(np.sqrt(target_area / area)) + dst_pts: npt.NDArray[np.float64] = intersects * intersect_scale + + dst_pts = np.round(dst_pts) + + matrix: npt.NDArray[np.float64] = cv2.getPerspectiveTransform(src_pts, dst_pts) + + result_height: int = int(np.max(dst_pts[:, 1])) + 1 + result_width: int = int(np.max(dst_pts[:, 0])) + 1 + + result: npt.NDArray[np.uint8] = cv2.warpPerspective( + image, + matrix, + (result_width, result_height), + flags=interpolation, + borderMode=cv2.BORDER_TRANSPARENT, + ) + + return result, dst_pts.astype(np.int32) diff --git a/vision/vector_utils.py b/vision/vector_utils.py new file mode 100644 index 0000000..372e797 --- /dev/null +++ b/vision/vector_utils.py @@ -0,0 +1,261 @@ +"""Functions that use vectors to calculate camera intersections with the ground""" + +from typing import List, Tuple +import numpy.typing as npt + +import numpy as np +from scipy.spatial.transform import Rotation + +# Sony RX100 VII sensor size +SENSOR_WIDTH = 13.2 +SENSOR_HEIGHT = 8.8 + +# The rotation offset of the camera to the drone. The offset is applied in pixel_intersect +# Set to [0.0, -90.0, 0.0] when the camera is facing directly downwards +ROTATION_OFFSET = [0.0, 0.0, 0.0] + + +def pixel_intersect( + pixel: Tuple[int, int], + image_shape: Tuple[int, ...], + focal_length: float, + rotation_deg: List[float], + height: float, +) -> npt.NDArray[np.float64]: + """ + Finds the intersection [X,Y] of a given pixel with the ground relative to the camera. + A camera with no rotation points in the +X direction and is centered at [0, 0, height]. + + Parameters + ---------- + pixel : Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by image.shape when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + height : float + The height that the image was taken at. The units of the output will be the units of the + input. + Returns + ------- + intersect : npt.NDArray[np.float64] + The coordinates [X,Y] where the pixel's vector intersects with the ground. + """ + + # Create the normalized vector representing the direction of the given pixel + vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) + + rotation_deg = np.deg2rad(rotation_deg).tolist() + + vector = euler_rotate(vector, rotation_deg) + + vector = euler_rotate(vector, ROTATION_OFFSET) + + intersect: npt.NDArray[np.float64] = plane_collision(vector, height) + + return intersect + + +def plane_collision( + ray_direction: npt.NDArray[np.float64], height: float +) -> npt.NDArray[np.float64]: + """ + Returns the point where a ray intersects the XY plane + + Parameters + ---------- + ray_direction : npt.NDArray[np.float64] + XYZ coordinates that represent the direction a ray faces from (0, 0, 0) + height : float + The Z coordinate for the starting height of the ray; can be any units + + Returns + ------- + intersect : npt.NDArray[np.float64] + The ray's intersection with the plane in [X,Y] format + + """ + # Find the "time" at which the line intersects the plane + # Line is defined as ray_direction * time + origin. + # Origin is the point at X, Y, Z = (0, 0, height) + + time: np.float64 = -height / ray_direction[2] + intersect: npt.NDArray[np.float64] = ray_direction[:2] * time + + return intersect + + +def pixel_vector( + pixel: Tuple[int, int], image_shape: Tuple[int, ...], focal_length: float +) -> npt.NDArray[np.float64]: + """ + Generates a vector representing the given pixel. + Pixels are in row-major form [Y, X] to match numpy indexing. + + Parameters + ---------- + pixel : Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by image.shape when image is a numpy image array) + focal_length : float + The camera's focal length - used to generate the camera's fields of view + + Returns + ------- + pixel_vector : npt.NDArray[np.float64] + The vector that represents the direction of the given pixel + """ + + # Find the FOVs using the focal length + fov_h: float + fov_v: float + fov_h, fov_v = focal_length_to_fovs(focal_length) + + return camera_vector( + pixel_angle(fov_h, pixel[1] / image_shape[1]), + pixel_angle(fov_v, pixel[0] / image_shape[0]), + ) + + +def pixel_angle(fov: float, ratio: float) -> float: + """ + Calculates a pixel's angle from the center of the camera on a single axis. Analogous to the + pixel's "fov" + + Only one component of the pixel is used here, call this function for each X and Y + + Parameters + ---------- + fov : float + The field of view of the camera in radians olong a given axis + ratio : float + The pixel's position as a ratio of the coordinate to the length of the image + Example: For an image that is 1080 pixels wide, a pixel at position 270 would have a + ratio of 0.25 + + Returns + ------- + angle : float + The pixel's angle from the center of the camera along a single axis + """ + return np.arctan(np.tan(fov / 2) * (1 - 2 * ratio)) + + +def focal_length_to_fovs(focal_length: float) -> Tuple[float, float]: + """ + Converts a given focal length to the horizontal and vertical fields of view in radians + + Uses SENSOR_WIDTH and SENSOR_HEIGHT, which are set to 13.2 and 8.8 respectively, the size of + the sensor in the Sony RX100 vii + + Parameters + ---------- + focal_length: float + The focal length of the camera in millimeters + Returns + ------- + fields_of_view : Tuple[float, float] + The fields of view in radians + Format is [horizontal, vertical] + """ + return get_fov(focal_length, SENSOR_WIDTH), get_fov(focal_length, SENSOR_HEIGHT) + + +def get_fov(focal_length: float, sensor_size: float) -> float: + """ + Converts a given focal length and sensor length to the corresponding field of view in radians + + Parameters + ---------- + focal_length : float + The focal length of the camera in millimeters + sensor_size: + The sensor size along one axis in millimeters + + Returns + ------- + fov : float + The field of view in radians + """ + + return 2 * np.arctan(sensor_size / (2 * focal_length)) + + +def camera_vector(h_angle: float, v_angle: float) -> npt.NDArray[np.float64]: + """ + Generates a vector with an angle h_angle with the horizontal and an angle v_angle with the + vertical. + + Using camera fovs will generate a vector that represents the corner of the camera's view. + + Parameters + ---------- + h_angle : float + The angle in radians to rotate horizontally + v_angle : float + The angle in radians to rotate vertically + Returns + ------- + camera_vector : npt.NDArray[np.float64] + The vector which represents a given location in an image + """ + + # Calculate the vertical rotation needed for the final vector to have the desired direction + edge: float = edge_angle(v_angle, h_angle) + + vector: npt.NDArray[np.float64] = np.array([1, 0, 0], dtype=np.float64) + return euler_rotate(vector, [0, edge, -h_angle]) + + +def edge_angle(v_angle: float, h_angle: float) -> float: + """ + Finds the angle such that rotating by edge_angle on the Y axis then rotating by h_angle on + the Z axis gives a vector an angle v_angle with the Y axis + + Can be derived using a square pyramid of height 1 + + Parameters + ---------- + v_angle : float + The vertical angle + h_angle : float + The horizontal angle + Returns + ------- + edge_angle : float + The angle to rotate vertically + """ + + return np.arctan(np.tan(v_angle) * np.cos(h_angle)) + + +def euler_rotate( + vector: npt.NDArray[np.float64], rotation_deg: List[float] +) -> npt.NDArray[np.float64]: + """ + Rotates a vector based on a given roll, pitch, and yaw. + + Follows the MAVSDK.EulerAngle convention - positive roll is banking to the right, positive + pitch is pitching nose up, positive yaw is clock-wise seen from above. + + Parameters + ---------- + vector: npt.NDArray[np.float64] + A vector represented by an XYZ coordinate that will be rotated + rotation_deg: List[float] + The [roll, pitch, yaw] rotation in radians + Returns + ------- + rotated_vector : npt.NDArray[np.float64] + The vector which has been rotated + """ + + # Reverse the Y and Z rotation to match MAVSDK convention + rotation_deg[1] *= -1 + rotation_deg[2] *= -1 + + return Rotation.from_euler("xyz", rotation_deg).apply(vector) From 5f8fedd2bb7891d681e7bc33c54c6c2a73d82827 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Tue, 24 May 2022 20:05:53 -0500 Subject: [PATCH 11/14] Moved files to common --- vision/{ => common}/camera_distances.py | 0 vision/{ => common}/coordinate_lengths.py | 0 vision/{ => common}/deskew.py | 0 vision/{ => common}/vector_utils.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename vision/{ => common}/camera_distances.py (100%) rename vision/{ => common}/coordinate_lengths.py (100%) rename vision/{ => common}/deskew.py (100%) rename vision/{ => common}/vector_utils.py (100%) diff --git a/vision/camera_distances.py b/vision/common/camera_distances.py similarity index 100% rename from vision/camera_distances.py rename to vision/common/camera_distances.py diff --git a/vision/coordinate_lengths.py b/vision/common/coordinate_lengths.py similarity index 100% rename from vision/coordinate_lengths.py rename to vision/common/coordinate_lengths.py diff --git a/vision/deskew.py b/vision/common/deskew.py similarity index 100% rename from vision/deskew.py rename to vision/common/deskew.py diff --git a/vision/vector_utils.py b/vision/common/vector_utils.py similarity index 100% rename from vision/vector_utils.py rename to vision/common/vector_utils.py From 3189ecec0d602b3dae2ed95a361f2b06e9321996 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Wed, 25 May 2022 20:52:08 -0500 Subject: [PATCH 12/14] Fixed import statements --- vision/common/camera_distances.py | 4 ++-- vision/common/deskew.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vision/common/camera_distances.py b/vision/common/camera_distances.py index fc6f8fa..0799374 100644 --- a/vision/common/camera_distances.py +++ b/vision/common/camera_distances.py @@ -5,8 +5,8 @@ import numpy as np import numpy.typing as npt -import coordinate_lengths -from vector_utils import pixel_intersect +from vision.common import coordinate_lengths +from vision.common.vector_utils import pixel_intersect def get_coordinates( diff --git a/vision/common/deskew.py b/vision/common/deskew.py index 71c0d88..83b8457 100644 --- a/vision/common/deskew.py +++ b/vision/common/deskew.py @@ -6,7 +6,7 @@ import numpy as np import numpy.typing as npt -from vector_utils import pixel_intersect +from vision.common.vector_utils import pixel_intersect def deskew( From a965c7fcd42e20b5c1e0b71f75d3cae3b6842048 Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Wed, 25 May 2022 23:27:54 -0500 Subject: [PATCH 13/14] Added checking for invalid intersects --- vision/__init__.py | 0 vision/common/__init__.py | 0 vision/common/camera_distances.py | 35 ++++++++++++++++++++----------- vision/common/deskew.py | 33 ++++++++++++++++++++++++----- vision/common/vector_utils.py | 23 ++++++++++++-------- 5 files changed, 65 insertions(+), 26 deletions(-) create mode 100644 vision/__init__.py create mode 100644 vision/common/__init__.py diff --git a/vision/__init__.py b/vision/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vision/common/__init__.py b/vision/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vision/common/camera_distances.py b/vision/common/camera_distances.py index 0799374..319dc5d 100644 --- a/vision/common/camera_distances.py +++ b/vision/common/camera_distances.py @@ -1,12 +1,11 @@ """Functions for calculating locations of objects in an image""" -from typing import Tuple, List +from typing import Tuple, List, Optional import numpy as np import numpy.typing as npt -from vision.common import coordinate_lengths -from vision.common.vector_utils import pixel_intersect +from vision.common import coordinate_lengths, vector_utils def get_coordinates( @@ -16,9 +15,10 @@ def get_coordinates( rotation_deg: List[float], drone_coordinates: List[float], altitude_m: float, -) -> Tuple[float, float]: +) -> Optional[Tuple[float, float]]: """ - Calculates the coordinates of the given pixel + Calculates the coordinates of the given pixel. + Returns None if there is no valid intersect. Parameters ---------- @@ -37,18 +37,23 @@ def get_coordinates( The altitude of the drone in meters Returns ------- - pixel_coordinates : Tuple[float, float] - The (latitude, longitude) coordinates of the pixel in degrees + pixel_coordinates : Optional[Tuple[float, float]] + The (latitude, longitude) coordinates of the pixel in degrees. + + Equal to None if there is no valid intersect. """ # Calculate the latitude and longitude lengths (in meters) latitude_length: float = coordinate_lengths.latitude_length(drone_coordinates[0]) longitude_length: float = coordinate_lengths.longitude_length(drone_coordinates[0]) # Find the pixel's intersect with the ground to get the location relative to the drone - intersect: npt.NDArray[np.float64] = pixel_intersect( + intersect: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect( pixel, image_shape, focal_length, rotation_deg, altitude_m ) + if intersect is None: + return None + # Invert the X axis so that the longitude is correct intersect[1] *= -1 @@ -66,7 +71,7 @@ def calculate_distance( focal_length: float, rotation_deg: List[float], altitude: float, -) -> float: +) -> Optional[float]: """ Calculates the physical distance between two points on the ground represented by pixel locations. Units of `distance` are the same as the units of `altitude` @@ -86,16 +91,22 @@ def calculate_distance( will be the units of the input. Returns ------- - distance : float + distance : Optional[float] The distance between the two pixels. Units are the same units as `altitude` + + Returns None if one or both of the points did not have an intersection """ - intersect1: npt.NDArray[np.float64] = pixel_intersect( + intersect1: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect( pixel1, image_shape, focal_length, rotation_deg, altitude ) - intersect2: npt.NDArray[np.float64] = pixel_intersect( + intersect2: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect( pixel2, image_shape, focal_length, rotation_deg, altitude ) + # Checks if the intersects were valid + if intersect1 is None or intersect2 is None: + return None + # Calculate the distance between the two intersects distance: float = float(np.linalg.norm(intersect1 - intersect2)) diff --git a/vision/common/deskew.py b/vision/common/deskew.py index 83b8457..0824f54 100644 --- a/vision/common/deskew.py +++ b/vision/common/deskew.py @@ -15,11 +15,16 @@ def deskew( rotation_deg: List[float], scale: float = 1, interpolation: Optional[int] = cv2.INTER_LINEAR, -) -> Tuple[npt.NDArray[np.uint8], npt.NDArray[np.float64]]: +) -> Tuple[Optional[npt.NDArray[np.uint8]], Optional[npt.NDArray[np.float64]]]: """ Distorts an image to generate an overhead view of the photo. Parts of the image will be completely black where the camera could not see. + Image is assumed to be a 3:2 aspect ratio to match the drone camera. + + Returns (None, None) if the rotation and focal_length information does not generate a valid + ending location. + Parameters ---------- image : npt.NDArray[np.uint8] @@ -35,16 +40,26 @@ def deskew( The cv2 interpolation type to be used when deskewing. Returns ------- - (deskewed_image, corner_points) : Tuple[npt.NDArray[np.uint8], npt.NDArray[np.float64]] + (deskewed_image, corner_points) : Tuple[ + Optional[npt.NDArray[np.uint8]], + Optional[npt.NDArray[np.float64]] + ] deskewed_image : npt.NDArray[np.uint8] The deskewed image - the image is flattened with black areas in the margins - corner_points : + + Returns None if no valid image could be generated. + + corner_points : npt.NDArray[np.float64]] The corner points of the result in the image. Points are in order based on their location in the original image. Format is: (top left, top right, bottom right, bottom left), or 1--2 | | + 4--3 + + Returns None if no valid image could be generated. + """ orig_height: int orig_width: int @@ -54,18 +69,26 @@ def deskew( # 1--2 # | | # 4--3 - src_pts: npt.NDArray[np.float32] = np.array( [[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]], dtype=np.float32 ) + + # Use the walrus operator to assign the result to `intersect` only if the result is not None intersects: npt.NDArray[np.float32] = np.array( [ - pixel_intersect(point, image.shape, focal_length, rotation_deg, 1) + intersect for point in np.flip(src_pts, axis=1) # use np.flip to convert XY to YX + if (intersect := pixel_intersect(point, image.shape, focal_length, rotation_deg, 1)) + is not None ], dtype=np.float32, ) + # Checks if the length of the list is less than 4. If it is, that means one of the points + # failed and no valid image can be generated. + if intersects.shape[0] < 4: + return None, None + # Flip the endpoints over the X axis (top left is 0,0 for images) intersects[:, 1] *= -1 diff --git a/vision/common/vector_utils.py b/vision/common/vector_utils.py index 372e797..308657e 100644 --- a/vision/common/vector_utils.py +++ b/vision/common/vector_utils.py @@ -1,8 +1,7 @@ """Functions that use vectors to calculate camera intersections with the ground""" -from typing import List, Tuple +from typing import List, Tuple, Optional import numpy.typing as npt - import numpy as np from scipy.spatial.transform import Rotation @@ -21,7 +20,7 @@ def pixel_intersect( focal_length: float, rotation_deg: List[float], height: float, -) -> npt.NDArray[np.float64]: +) -> Optional[npt.NDArray[np.float64]]: """ Finds the intersection [X,Y] of a given pixel with the ground relative to the camera. A camera with no rotation points in the +X direction and is centered at [0, 0, height]. @@ -41,8 +40,9 @@ def pixel_intersect( input. Returns ------- - intersect : npt.NDArray[np.float64] + intersect : Optional[npt.NDArray[np.float64]] The coordinates [X,Y] where the pixel's vector intersects with the ground. + Returns None if there is no intersect. """ # Create the normalized vector representing the direction of the given pixel @@ -54,16 +54,17 @@ def pixel_intersect( vector = euler_rotate(vector, ROTATION_OFFSET) - intersect: npt.NDArray[np.float64] = plane_collision(vector, height) + intersect: Optional[npt.NDArray[np.float64]] = plane_collision(vector, height) return intersect def plane_collision( ray_direction: npt.NDArray[np.float64], height: float -) -> npt.NDArray[np.float64]: +) -> Optional[npt.NDArray[np.float64]]: """ Returns the point where a ray intersects the XY plane + Returns None if there is no intersect. Parameters ---------- @@ -74,8 +75,9 @@ def plane_collision( Returns ------- - intersect : npt.NDArray[np.float64] + intersect : Optional[npt.NDArray[np.float64]] The ray's intersection with the plane in [X,Y] format + Returns None if there is no intersect. """ # Find the "time" at which the line intersects the plane @@ -83,9 +85,12 @@ def plane_collision( # Origin is the point at X, Y, Z = (0, 0, height) time: np.float64 = -height / ray_direction[2] - intersect: npt.NDArray[np.float64] = ray_direction[:2] * time - return intersect + # Checks if the ray intersects with the plane + if np.isinf(time) or np.isnan(time) or time < 0: + return None + + return ray_direction[:2] * time def pixel_vector( From c91d7d2e983a3ea706ad2b81cea3c559e7f8fe9c Mon Sep 17 00:00:00 2001 From: EnderDude67 Date: Thu, 26 May 2022 19:16:27 -0500 Subject: [PATCH 14/14] Improved readability in deskew intersect check --- vision/common/deskew.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/vision/common/deskew.py b/vision/common/deskew.py index 0824f54..cd09a15 100644 --- a/vision/common/deskew.py +++ b/vision/common/deskew.py @@ -73,20 +73,17 @@ def deskew( [[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]], dtype=np.float32 ) - # Use the walrus operator to assign the result to `intersect` only if the result is not None + # Numpy converts `None` to NaN intersects: npt.NDArray[np.float32] = np.array( [ - intersect + pixel_intersect(point, image.shape, focal_length, rotation_deg, 1) for point in np.flip(src_pts, axis=1) # use np.flip to convert XY to YX - if (intersect := pixel_intersect(point, image.shape, focal_length, rotation_deg, 1)) - is not None ], dtype=np.float32, ) - # Checks if the length of the list is less than 4. If it is, that means one of the points - # failed and no valid image can be generated. - if intersects.shape[0] < 4: + # Return (None, None) if any elements are NaN + if np.any(np.isnan(intersects)): return None, None # Flip the endpoints over the X axis (top left is 0,0 for images)