Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/image deskewing #84

Open
wants to merge 14 commits into
base: develop
Choose a base branch
from
Empty file added vision/__init__.py
Empty file.
Empty file added vision/common/__init__.py
Empty file.
113 changes: 113 additions & 0 deletions vision/common/camera_distances.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""Functions for calculating locations of objects in an image"""

from typing import Tuple, List, Optional

import numpy as np
import numpy.typing as npt

from vision.common import coordinate_lengths, vector_utils


def get_coordinates(
pixel: Tuple[int, int],
image_shape: Tuple[int, int, int],
focal_length: float,
rotation_deg: List[float],
drone_coordinates: List[float],
altitude_m: float,
) -> Optional[Tuple[float, float]]:
"""
Calculates the coordinates of the given pixel.
Returns None if there is no valid intersect.

Parameters
----------
pixel: Tuple[int, int]
The coordinates of the pixel in [Y, X] form
image_shape : Tuple[int, int, int]
The shape of the image (returned by `image.shape` when image is a numpy image array)
focal_length : float
The camera's focal length
rotation_deg: List[float]
The rotation of the drone/camera. The ROTATION_OFFSET in vector_utils.py will be applied
after.
drone_coordinates: List[float]
The coordinates of the drone in degrees of (latitude, longitude)
altitude_m: float
The altitude of the drone in meters
Returns
-------
pixel_coordinates : Optional[Tuple[float, float]]
The (latitude, longitude) coordinates of the pixel in degrees.

Equal to None if there is no valid intersect.
"""
# Calculate the latitude and longitude lengths (in meters)
latitude_length: float = coordinate_lengths.latitude_length(drone_coordinates[0])
longitude_length: float = coordinate_lengths.longitude_length(drone_coordinates[0])

# Find the pixel's intersect with the ground to get the location relative to the drone
intersect: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect(
pixel, image_shape, focal_length, rotation_deg, altitude_m
)

if intersect is None:
return None

# Invert the X axis so that the longitude is correct
intersect[1] *= -1

# Convert the location to latitude and longitude and add it to the drone's coordinates
pixel_lat = drone_coordinates[0] + intersect[0] / latitude_length
pixel_lon = drone_coordinates[1] + intersect[1] / longitude_length

return pixel_lat, pixel_lon


def calculate_distance(
pixel1: Tuple[int, int],
pixel2: Tuple[int, int],
image_shape: Tuple[int, int, int],
focal_length: float,
rotation_deg: List[float],
altitude: float,
) -> Optional[float]:
"""
Calculates the physical distance between two points on the ground represented by pixel
locations. Units of `distance` are the same as the units of `altitude`

Parameters
----------
pixel1, pixel2: Tuple[int, int]
The two input pixel locations in [Y,X] form. The distance between them will be calculated
image_shape : Tuple[int, int, int]
The shape of the image (returned by `image.shape` when image is a numpy image array)
focal_length : float
The camera's focal length
rotation_deg : List[float]
The [roll, pitch, yaw] rotation in degrees
altitude: float
The altitude of the drone in any units. If an altitude is given, the units of the output
will be the units of the input.
Returns
-------
distance : Optional[float]
The distance between the two pixels. Units are the same units as `altitude`

Returns None if one or both of the points did not have an intersection
"""
intersect1: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect(
pixel1, image_shape, focal_length, rotation_deg, altitude
)
intersect2: Optional[npt.NDArray[np.float64]] = vector_utils.pixel_intersect(
pixel2, image_shape, focal_length, rotation_deg, altitude
)

# Checks if the intersects were valid
if intersect1 is None or intersect2 is None:
return None

# Calculate the distance between the two intersects
distance: float = float(np.linalg.norm(intersect1 - intersect2))

return distance
63 changes: 63 additions & 0 deletions vision/common/coordinate_lengths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Functions for calculating coordinate degree lengths"""

import numpy as np


def latitude_length(latitude: float) -> float:
"""
Returns the distance in meters of one degree of latitude at a particular longitude

Parameter
---------
latitude : float
The latitude in degrees

Returns
-------
latitude_length
The length of a degree of latitude in meters at the given latitude

References
----------
https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree
"""

# Convert to radians for trig functions
latitude = np.deg2rad(latitude)

distance: float = (
111132.92
- 559.82 * np.cos(2 * latitude)
+ 1.175 * np.cos(4 * latitude)
- 0.0023 * np.cos(6 * latitude)
)

return distance


def longitude_length(latitude: float) -> float:
"""
Calculates the distance in meters of one degree of longitude at that longitude

Parameter
---------
latitude : float
The latitude in degrees

Returns
-------
longitude_length
The length of a degree of longitude in meters at the given latitude
References
----------
https://en.wikipedia.org/wiki/Geographic_coordinate_system#Length_of_a_degree
"""

# Convert degrees to radians for trig functions
latitude = np.deg2rad(latitude)

distance: float = (
111412.84 * np.cos(latitude) - 93.5 * np.cos(3 * latitude) + 0.118 * np.cos(5 * latitude)
)

return distance
118 changes: 118 additions & 0 deletions vision/common/deskew.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Distorts an image to generate an overhead view of the photo."""

from typing import List, Tuple, Optional

import cv2
import numpy as np
import numpy.typing as npt

from vision.common.vector_utils import pixel_intersect


def deskew(
image: npt.NDArray[np.uint8],
focal_length: float,
rotation_deg: List[float],
scale: float = 1,
interpolation: Optional[int] = cv2.INTER_LINEAR,
) -> Tuple[Optional[npt.NDArray[np.uint8]], Optional[npt.NDArray[np.float64]]]:
"""
Distorts an image to generate an overhead view of the photo. Parts of the image will be
completely black where the camera could not see.

Image is assumed to be a 3:2 aspect ratio to match the drone camera.

Returns (None, None) if the rotation and focal_length information does not generate a valid
ending location.

Parameters
----------
image : npt.NDArray[np.uint8]
The input image to deskew. Aspect ratio should match the camera sensor
focal_length : float
The camera's focal length - used to generate the camera's fields of view
rotation_deg : List[float]
The [roll, pitch, yaw] rotation in degrees
scale: Optional[float]
Scales the resolution of the output. A value of 1 makes the area inside the camera view
equal to the original image. Defaults to 1.
interpolation: Optional[int]
The cv2 interpolation type to be used when deskewing.
Returns
-------
(deskewed_image, corner_points) : Tuple[
Optional[npt.NDArray[np.uint8]],
Optional[npt.NDArray[np.float64]]
]
deskewed_image : npt.NDArray[np.uint8]
The deskewed image - the image is flattened with black areas in the margins

Returns None if no valid image could be generated.

corner_points : npt.NDArray[np.float64]]
The corner points of the result in the image.
Points are in order based on their location in the original image.
Format is: (top left, top right, bottom right, bottom left), or
1--2
| |

4--3

Returns None if no valid image could be generated.

"""
orig_height: int
orig_width: int
orig_height, orig_width, _ = image.shape

# Generate points in the format
# 1--2
# | |
# 4--3
src_pts: npt.NDArray[np.float32] = np.array(
[[0, 0], [orig_width, 0], [orig_width, orig_height], [0, orig_height]], dtype=np.float32
)

# Numpy converts `None` to NaN
intersects: npt.NDArray[np.float32] = np.array(
[
pixel_intersect(point, image.shape, focal_length, rotation_deg, 1)
for point in np.flip(src_pts, axis=1) # use np.flip to convert XY to YX
],
dtype=np.float32,
)

# Return (None, None) if any elements are NaN
if np.any(np.isnan(intersects)):
return None, None

# Flip the endpoints over the X axis (top left is 0,0 for images)
intersects[:, 1] *= -1

# Subtract the minimum on both axes so the minimum values on each axis are 0
intersects -= np.min(intersects, axis=0)

# Find the area using cv2 contour tools
area: float = cv2.contourArea(intersects)

# Scale the output so the area of the important pixels is about the same as the starting image
target_area: float = float(image.shape[0]) * (float(image.shape[1]) * scale)
intersect_scale: np.float64 = np.float64(np.sqrt(target_area / area))
dst_pts: npt.NDArray[np.float64] = intersects * intersect_scale

dst_pts = np.round(dst_pts)

matrix: npt.NDArray[np.float64] = cv2.getPerspectiveTransform(src_pts, dst_pts)

result_height: int = int(np.max(dst_pts[:, 1])) + 1
result_width: int = int(np.max(dst_pts[:, 0])) + 1

result: npt.NDArray[np.uint8] = cv2.warpPerspective(
image,
matrix,
(result_width, result_height),
flags=interpolation,
borderMode=cv2.BORDER_TRANSPARENT,
)

return result, dst_pts.astype(np.int32)
Loading