diff --git a/vision/__init__.py b/vision/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vision/common/__init__.py b/vision/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vision/common/camera_distances.py b/vision/common/camera_distances.py new file mode 100644 index 0000000..319dc5d --- /dev/null +++ b/vision/common/camera_distances.py @@ -0,0 +1,113 @@ +"""Functions for calculating locations of objects in an image""" + +from typing import Tuple, List, Optional + +import numpy as np +import numpy.typing as npt + +from vision.common import coordinate_lengths, vector_utils + + +def get_coordinates( + pixel: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + drone_coordinates: List[float], + altitude_m: float, +) -> Optional[Tuple[float, float]]: + """ + Calculates the coordinates of the given pixel. + Returns None if there is no valid intersect. + + Parameters + ---------- + pixel: Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by `image.shape` when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg: List[float] + The rotation of the drone/camera. The ROTATION_OFFSET in vector_utils.py will be applied + after. + drone_coordinates: List[float] + The coordinates of the drone in degrees of (latitude, longitude) + altitude_m: float + The altitude of the drone in meters + Returns + ------- + pixel_coordinates : Optional[Tuple[float, float]] + The (latitude, longitude) coordinates of the pixel in degrees. + + Equal to None if there is no valid intersect. + """ + # Calculate the latitude and longitude lengths (in meters) + latitude_length: float = coordinate_lengths.latitude_length(drone_coordinates[0]) + longitude_length: float = coordinate_lengths.longitude_length(drone_coordinates[0]) + + # Find the pixel's intersect with the ground to get the location relative to the drone + intersect: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect( + pixel, image_shape, focal_length, rotation_deg, altitude_m + ) + + if intersect is None: + return None + + # Invert the X axis so that the longitude is correct + intersect[1] *= -1 + + # Convert the location to latitude and longitude and add it to the drone's coordinates + pixel_lat = drone_coordinates[0] + intersect[0] / latitude_length + pixel_lon = drone_coordinates[1] + intersect[1] / longitude_length + + return pixel_lat, pixel_lon + + +def calculate_distance( + pixel1: Tuple[int, int], + pixel2: Tuple[int, int], + image_shape: Tuple[int, int, int], + focal_length: float, + rotation_deg: List[float], + altitude: float, +) -> Optional[float]: + """ + Calculates the physical distance between two points on the ground represented by pixel + locations. Units of `distance` are the same as the units of `altitude` + + Parameters + ---------- + pixel1, pixel2: Tuple[int, int] + The two input pixel locations in [Y,X] form. The distance between them will be calculated + image_shape : Tuple[int, int, int] + The shape of the image (returned by `image.shape` when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + altitude: float + The altitude of the drone in any units. If an altitude is given, the units of the output + will be the units of the input. + Returns + ------- + distance : Optional[float] + The distance between the two pixels. Units are the same units as `altitude` + + Returns None if one or both of the points did not have an intersection + """ + intersect1: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect( + pixel1, image_shape, focal_length, rotation_deg, altitude + ) + intersect2: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect( + pixel2, image_shape, focal_length, rotation_deg, altitude + ) + + # Checks if the intersects were valid + if intersect1 is None or intersect2 is None: + return None + + # Calculate the distance between the two intersects + distance: float = float(np.linalg.norm(intersect1 - intersect2)) + + return distance diff --git a/vision/common/coordinate_lengths.py b/vision/common/coordinate_lengths.py new file mode 100644 index 0000000..01c78d5 --- /dev/null +++ b/vision/common/coordinate_lengths.py @@ -0,0 +1,63 @@ +"""Functions for calculating coordinate degree lengths""" + +import numpy as np + + +def latitude_length(latitude: float) -> float: + """ + Returns the distance in meters of one degree of latitude at a particular longitude + + Parameter + --------- + latitude : float + The latitude in degrees + + Returns + ------- + latitude_length + The length of a degree of latitude in meters at the given latitude + + References + ---------- + https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree + """ + + # Convert to radians for trig functions + latitude = np.deg2rad(latitude) + + distance: float = ( + 111132.92 + - 559.82 * np.cos(2 * latitude) + + 1.175 * np.cos(4 * latitude) + - 0.0023 * np.cos(6 * latitude) + ) + + return distance + + +def longitude_length(latitude: float) -> float: + """ + Calculates the distance in meters of one degree of longitude at that longitude + + Parameter + --------- + latitude : float + The latitude in degrees + + Returns + ------- + longitude_length + The length of a degree of longitude in meters at the given latitude + References + ---------- + https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree + """ + + # Convert degrees to radians for trig functions + latitude = np.deg2rad(latitude) + + distance: float = ( + 111412.84 * np.cos(latitude) - 93.5 * np.cos(3 * latitude) + 0.118 * np.cos(5 * latitude) + ) + + return distance diff --git a/vision/common/deskew.py b/vision/common/deskew.py new file mode 100644 index 0000000..cd09a15 --- /dev/null +++ b/vision/common/deskew.py @@ -0,0 +1,118 @@ +"""Distorts an image to generate an overhead view of the photo.""" + +from typing import List, Tuple, Optional + +import cv2 +import numpy as np +import numpy.typing as npt + +from vision.common.vector_utils import pixel_intersect + + +def deskew( + image: npt.NDArray[np.uint8], + focal_length: float, + rotation_deg: List[float], + scale: float = 1, + interpolation: Optional[int] = cv2.INTER_LINEAR, +) -> Tuple[Optional[npt.NDArray[np.uint8]], Optional[npt.NDArray[np.float64]]]: + """ + Distorts an image to generate an overhead view of the photo. Parts of the image will be + completely black where the camera could not see. + + Image is assumed to be a 3:2 aspect ratio to match the drone camera. + + Returns (None, None) if the rotation and focal_length information does not generate a valid + ending location. + + Parameters + ---------- + image : npt.NDArray[np.uint8] + The input image to deskew. Aspect ratio should match the camera sensor + focal_length : float + The camera's focal length - used to generate the camera's fields of view + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + scale: Optional[float] + Scales the resolution of the output. A value of 1 makes the area inside the camera view + equal to the original image. Defaults to 1. + interpolation: Optional[int] + The cv2 interpolation type to be used when deskewing. + Returns + ------- + (deskewed_image, corner_points) : Tuple[ + Optional[npt.NDArray[np.uint8]], + Optional[npt.NDArray[np.float64]] + ] + deskewed_image : npt.NDArray[np.uint8] + The deskewed image - the image is flattened with black areas in the margins + + Returns None if no valid image could be generated. + + corner_points : npt.NDArray[np.float64]] + The corner points of the result in the image. + Points are in order based on their location in the original image. + Format is: (top left, top right, bottom right, bottom left), or + 1--2 + | | + + 4--3 + + Returns None if no valid image could be generated. + + """ + orig_height: int + orig_width: int + orig_height, orig_width, _ = image.shape + + # Generate points in the format + # 1--2 + # | | + # 4--3 + src_pts: npt.NDArray[np.float32] = np.array( + [[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]], dtype=np.float32 + ) + + # Numpy converts `None` to NaN + intersects: npt.NDArray[np.float32] = np.array( + [ + pixel_intersect(point, image.shape, focal_length, rotation_deg, 1) + for point in np.flip(src_pts, axis=1) # use np.flip to convert XY to YX + ], + dtype=np.float32, + ) + + # Return (None, None) if any elements are NaN + if np.any(np.isnan(intersects)): + return None, None + + # Flip the endpoints over the X axis (top left is 0,0 for images) + intersects[:, 1] *= -1 + + # Subtract the minimum on both axes so the minimum values on each axis are 0 + intersects -= np.min(intersects, axis=0) + + # Find the area using cv2 contour tools + area: float = cv2.contourArea(intersects) + + # Scale the output so the area of the important pixels is about the same as the starting image + target_area: float = float(image.shape[0]) * (float(image.shape[1]) * scale) + intersect_scale: np.float64 = np.float64(np.sqrt(target_area / area)) + dst_pts: npt.NDArray[np.float64] = intersects * intersect_scale + + dst_pts = np.round(dst_pts) + + matrix: npt.NDArray[np.float64] = cv2.getPerspectiveTransform(src_pts, dst_pts) + + result_height: int = int(np.max(dst_pts[:, 1])) + 1 + result_width: int = int(np.max(dst_pts[:, 0])) + 1 + + result: npt.NDArray[np.uint8] = cv2.warpPerspective( + image, + matrix, + (result_width, result_height), + flags=interpolation, + borderMode=cv2.BORDER_TRANSPARENT, + ) + + return result, dst_pts.astype(np.int32) diff --git a/vision/common/vector_utils.py b/vision/common/vector_utils.py new file mode 100644 index 0000000..308657e --- /dev/null +++ b/vision/common/vector_utils.py @@ -0,0 +1,266 @@ +"""Functions that use vectors to calculate camera intersections with the ground""" + +from typing import List, Tuple, Optional +import numpy.typing as npt +import numpy as np +from scipy.spatial.transform import Rotation + +# Sony RX100 VII sensor size +SENSOR_WIDTH = 13.2 +SENSOR_HEIGHT = 8.8 + +# The rotation offset of the camera to the drone. The offset is applied in pixel_intersect +# Set to [0.0, -90.0, 0.0] when the camera is facing directly downwards +ROTATION_OFFSET = [0.0, 0.0, 0.0] + + +def pixel_intersect( + pixel: Tuple[int, int], + image_shape: Tuple[int, ...], + focal_length: float, + rotation_deg: List[float], + height: float, +) -> Optional[npt.NDArray[np.float64]]: + """ + Finds the intersection [X,Y] of a given pixel with the ground relative to the camera. + A camera with no rotation points in the +X direction and is centered at [0, 0, height]. + + Parameters + ---------- + pixel : Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by image.shape when image is a numpy image array) + focal_length : float + The camera's focal length + rotation_deg : List[float] + The [roll, pitch, yaw] rotation in degrees + height : float + The height that the image was taken at. The units of the output will be the units of the + input. + Returns + ------- + intersect : Optional[npt.NDArray[np.float64]] + The coordinates [X,Y] where the pixel's vector intersects with the ground. + Returns None if there is no intersect. + """ + + # Create the normalized vector representing the direction of the given pixel + vector: npt.NDArray[np.float64] = pixel_vector(pixel, image_shape, focal_length) + + rotation_deg = np.deg2rad(rotation_deg).tolist() + + vector = euler_rotate(vector, rotation_deg) + + vector = euler_rotate(vector, ROTATION_OFFSET) + + intersect: Optional[npt.NDArray[np.float64]] = plane_collision(vector, height) + + return intersect + + +def plane_collision( + ray_direction: npt.NDArray[np.float64], height: float +) -> Optional[npt.NDArray[np.float64]]: + """ + Returns the point where a ray intersects the XY plane + Returns None if there is no intersect. + + Parameters + ---------- + ray_direction : npt.NDArray[np.float64] + XYZ coordinates that represent the direction a ray faces from (0, 0, 0) + height : float + The Z coordinate for the starting height of the ray; can be any units + + Returns + ------- + intersect : Optional[npt.NDArray[np.float64]] + The ray's intersection with the plane in [X,Y] format + Returns None if there is no intersect. + + """ + # Find the "time" at which the line intersects the plane + # Line is defined as ray_direction * time + origin. + # Origin is the point at X, Y, Z = (0, 0, height) + + time: np.float64 = -height / ray_direction[2] + + # Checks if the ray intersects with the plane + if np.isinf(time) or np.isnan(time) or time < 0: + return None + + return ray_direction[:2] * time + + +def pixel_vector( + pixel: Tuple[int, int], image_shape: Tuple[int, ...], focal_length: float +) -> npt.NDArray[np.float64]: + """ + Generates a vector representing the given pixel. + Pixels are in row-major form [Y, X] to match numpy indexing. + + Parameters + ---------- + pixel : Tuple[int, int] + The coordinates of the pixel in [Y, X] form + image_shape : Tuple[int, int, int] + The shape of the image (returned by image.shape when image is a numpy image array) + focal_length : float + The camera's focal length - used to generate the camera's fields of view + + Returns + ------- + pixel_vector : npt.NDArray[np.float64] + The vector that represents the direction of the given pixel + """ + + # Find the FOVs using the focal length + fov_h: float + fov_v: float + fov_h, fov_v = focal_length_to_fovs(focal_length) + + return camera_vector( + pixel_angle(fov_h, pixel[1] / image_shape[1]), + pixel_angle(fov_v, pixel[0] / image_shape[0]), + ) + + +def pixel_angle(fov: float, ratio: float) -> float: + """ + Calculates a pixel's angle from the center of the camera on a single axis. Analogous to the + pixel's "fov" + + Only one component of the pixel is used here, call this function for each X and Y + + Parameters + ---------- + fov : float + The field of view of the camera in radians olong a given axis + ratio : float + The pixel's position as a ratio of the coordinate to the length of the image + Example: For an image that is 1080 pixels wide, a pixel at position 270 would have a + ratio of 0.25 + + Returns + ------- + angle : float + The pixel's angle from the center of the camera along a single axis + """ + return np.arctan(np.tan(fov / 2) * (1 - 2 * ratio)) + + +def focal_length_to_fovs(focal_length: float) -> Tuple[float, float]: + """ + Converts a given focal length to the horizontal and vertical fields of view in radians + + Uses SENSOR_WIDTH and SENSOR_HEIGHT, which are set to 13.2 and 8.8 respectively, the size of + the sensor in the Sony RX100 vii + + Parameters + ---------- + focal_length: float + The focal length of the camera in millimeters + Returns + ------- + fields_of_view : Tuple[float, float] + The fields of view in radians + Format is [horizontal, vertical] + """ + return get_fov(focal_length, SENSOR_WIDTH), get_fov(focal_length, SENSOR_HEIGHT) + + +def get_fov(focal_length: float, sensor_size: float) -> float: + """ + Converts a given focal length and sensor length to the corresponding field of view in radians + + Parameters + ---------- + focal_length : float + The focal length of the camera in millimeters + sensor_size: + The sensor size along one axis in millimeters + + Returns + ------- + fov : float + The field of view in radians + """ + + return 2 * np.arctan(sensor_size / (2 * focal_length)) + + +def camera_vector(h_angle: float, v_angle: float) -> npt.NDArray[np.float64]: + """ + Generates a vector with an angle h_angle with the horizontal and an angle v_angle with the + vertical. + + Using camera fovs will generate a vector that represents the corner of the camera's view. + + Parameters + ---------- + h_angle : float + The angle in radians to rotate horizontally + v_angle : float + The angle in radians to rotate vertically + Returns + ------- + camera_vector : npt.NDArray[np.float64] + The vector which represents a given location in an image + """ + + # Calculate the vertical rotation needed for the final vector to have the desired direction + edge: float = edge_angle(v_angle, h_angle) + + vector: npt.NDArray[np.float64] = np.array([1, 0, 0], dtype=np.float64) + return euler_rotate(vector, [0, edge, -h_angle]) + + +def edge_angle(v_angle: float, h_angle: float) -> float: + """ + Finds the angle such that rotating by edge_angle on the Y axis then rotating by h_angle on + the Z axis gives a vector an angle v_angle with the Y axis + + Can be derived using a square pyramid of height 1 + + Parameters + ---------- + v_angle : float + The vertical angle + h_angle : float + The horizontal angle + Returns + ------- + edge_angle : float + The angle to rotate vertically + """ + + return np.arctan(np.tan(v_angle) * np.cos(h_angle)) + + +def euler_rotate( + vector: npt.NDArray[np.float64], rotation_deg: List[float] +) -> npt.NDArray[np.float64]: + """ + Rotates a vector based on a given roll, pitch, and yaw. + + Follows the MAVSDK.EulerAngle convention - positive roll is banking to the right, positive + pitch is pitching nose up, positive yaw is clock-wise seen from above. + + Parameters + ---------- + vector: npt.NDArray[np.float64] + A vector represented by an XYZ coordinate that will be rotated + rotation_deg: List[float] + The [roll, pitch, yaw] rotation in radians + Returns + ------- + rotated_vector : npt.NDArray[np.float64] + The vector which has been rotated + """ + + # Reverse the Y and Z rotation to match MAVSDK convention + rotation_deg[1] *= -1 + rotation_deg[2] *= -1 + + return Rotation.from_euler("xyz", rotation_deg).apply(vector)