From 77eec77482d96678a6716c06e1d10f57b448d69a Mon Sep 17 00:00:00 2001 From: Simon Jones Date: Thu, 25 Apr 2024 10:56:47 -0400 Subject: [PATCH] feat: setting up github actions to run on every push --- .github/workflows/main.yml | 8 ++++---- abstract.md | 24 ++++++++++++------------ thesis.md | 12 +++++++++++- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0767c07..909a4e3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,8 +1,6 @@ name: Release Senior Thesis -on: - push: - tags: - - '*.*.*' +on: [push] + jobs: publish: runs-on: ubuntu-latest @@ -37,6 +35,7 @@ jobs: - name: Create release id: create_release uses: actions/create-release@v1 + if: startsWith(github.ref, 'refs/tags/') env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: @@ -47,6 +46,7 @@ jobs: - name: Upload released asset id: upload-release-asset uses: actions/upload-release-asset@v1 + if: startsWith(github.ref, 'refs/tags/') env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/abstract.md b/abstract.md index f4d8cd7..502049b 100644 --- a/abstract.md +++ b/abstract.md @@ -5,15 +5,15 @@ surroundings in a real world environment, and it is necessary to realize technologies such as fully autonomous unmanned aerial vehicles (UAVs) and land vehicles. Reinforcement Learning (RL) has proven to be a novel and effective method for autonomous navigation and control, as it is capable of optimizing a -method of converting its instantaneous state to an action at a point in time -[@gugan2023; @song2023; @doukhi2022]. Here we use a Deep Deterministic Policy -Gradient (DDPG) RL algorithm to train the COEX Clover quadcopter system to -perform autonomous navigation. With the advent of solid state lasers, -miniaturized optical ranging systems have become ubiquitous for aerial robotics -because of their low power and accuracy [@raj2020]. By equipping the Clover with -ten Time of Flight (ToF) ranging sensors, we supply continuous spatial data in -combination with inertial data to determine the quadcopter's state, which is -then mapped to its control output. Our results suggest that, while the DDPG -algorithm is capable of training a quadcopter system for autonomous navigation, -its computation-heavy nature leads to delayed convergence, and relying on -discretized algorithms may permit more rapid convergence across episodes. +method of converting its instantaneous state to an action at a point in time. +Here we use a Deep Deterministic Policy Gradient (DDPG) RL algorithm to train +the COEX Clover quadcopter system to perform autonomous navigation. With the +advent of solid state lasers, miniaturized optical ranging systems have become +ubiquitous for aerial robotics because of their low power and accuracy. By +equipping the Clover with ten Time of Flight (ToF) ranging sensors, we supply +continuous spatial data in combination with inertial data to determine the +quadcopter's state, which is then mapped to its control output. Our results +suggest that, while the DDPG algorithm is capable of training a quadcopter +system for autonomous navigation, its computation-heavy nature leads to delayed +convergence, and relying on discretized algorithms may permit more rapid +convergence across episodes. diff --git a/thesis.md b/thesis.md index 53433ee..c7e3bc0 100644 --- a/thesis.md +++ b/thesis.md @@ -94,6 +94,8 @@ using simpler, more economically affordable sensors can enable a quadcopter to fly in a GPS-denied environment without the use of LiDAR, which is typically an order of magnitude more expensive. + + ## Ethical Implications ### Civilian Use @@ -424,7 +426,7 @@ a case for the expected adaptability of a DDPG algorithm in curriculum learning. Because both PPO and DDPG are model-free algorithms with continuous state and action spaces, we expect similar levels of aptness for curriculum learning. -# Method of approach +# Method of Approach This project uses the Copter Express (COEX) Clover quadcopter platform, equipped with Time of Flight (ToF) ranging sensors, and applies a Deep Deterministic @@ -944,6 +946,10 @@ the number of episodes increases. ## Theory + + ### Deep Reinforcement Learning As stated, this project uses a Deep RL algorithm known as the Deep Deterministic @@ -1647,6 +1653,10 @@ order of days or weeks. ![Episodic duration versus episode number for run 1, whose training results are displayed in {+@fig:plot1}.](images/plots/plot-episode-duration.png){#fig:plot-episode-duration width=100%} +### Threats to Validity + + + # Future Work The results of this project suggest the need for more extensive training using