diff --git a/.github/_clips_0601_1494452613491980502_20.jpg b/.github/_clips_0601_1494452613491980502_20.jpg new file mode 100644 index 0000000..086ee51 Binary files /dev/null and b/.github/_clips_0601_1494452613491980502_20.jpg differ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a1a77f6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +work_dirs/ +predicts/ +output/ +data/ + +__pycache__/ +*/*.un~ +.*.swp + + + +*.egg-info/ +*.egg + +output.txt +.vscode/* +.DS_Store +tmp.* +*.pt +*.pth +*.un~ + +lane +debug +pretrained_models diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8df642d --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2021 Tu Zheng + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..57b92bc --- /dev/null +++ b/README.md @@ -0,0 +1,161 @@ +# LaneDet +## Introduction +LaneDet is an open source lane detection toolbox based on PyTorch that aims to pull together a wide variety of state-of-the-art lane detection models. Developers can reproduce these SOTA methods and build their own methods. + +![demo image](.github/_clips_0601_1494452613491980502_20.jpg) + +## Table of Contents +* [Introduction](#Introduction) +* [Benchmark and model zoo](#Benchmark-and-model-zoo) +* [Installation](#Installation) +* [Getting Started](#Getting-started) +* [Contributing](#Contributing) +* [Licenses](#Licenses) +* [Acknowledgement](#Acknowledgement) + +## Benchmark and model zoo +Supported backbones: +- [x] ResNet +- [x] ERFNet +- [x] VGG +- [ ] DLA (comming soon) + +Supported detectors: +- [x] [SCNN](configs/scnn) +- [x] [UFLD](configs/ufld) +- [x] [RESA](configs/resa) +- [ ] LaneATT (comming sonn) +- [ ] LaneAF (comming sonn) + +## Installation + + +### Clone this repository +``` +git clone https://github.com/turoad/lanedet.git +``` +We call this directory as `$LANEDET_ROOT` + +### Create a conda virtual environment and activate it (conda is optional) + +```Shell +conda create -n lanedet python=3.8 -y +conda activate lanedet +``` + +### Install dependencies + +```Shell +# Install pytorch firstly, the cudatoolkit version should be same in your system. (you can also use pip to install pytorch and torchvision) +conda install pytorch torchvision cudatoolkit=10.1 -c pytorch + +# Or you can install via pip +pip install torch torchvision + +# Install python packages +pip install -r requirements.txt +``` + +### Data preparation + +#### CULane + +Download [CULane](https://xingangpan.github.io/projects/CULane.html). Then extract them to `$CULANEROOT`. Create link to `data` directory. + +```Shell +cd $RESA_ROOT +mkdir -p data +ln -s $CULANEROOT data/CULane +``` + +For CULane, you should have structure like this: +``` +$CULANEROOT/driver_xx_xxframe # data folders x6 +$CULANEROOT/laneseg_label_w16 # lane segmentation labels +$CULANEROOT/list # data lists +``` + +#### Tusimple +Download [Tusimple](https://github.com/TuSimple/tusimple-benchmark/issues/3). Then extract them to `$TUSIMPLEROOT`. Create link to `data` directory. + +```Shell +cd $RESA_ROOT +mkdir -p data +ln -s $TUSIMPLEROOT data/tusimple +``` + +For Tusimple, you should have structure like this: +``` +$TUSIMPLEROOT/clips # data folders +$TUSIMPLEROOT/lable_data_xxxx.json # label json file x4 +$TUSIMPLEROOT/test_tasks_0627.json # test tasks json file +$TUSIMPLEROOT/test_label.json # test label json file + +``` + +For Tusimple, the segmentation annotation is not provided, hence we need to generate segmentation from the json annotation. + +```Shell +python scripts/generate_seg_tusimple.py --root $TUSIMPLEROOT +# this will generate seg_label directory +``` + +## Getting Started +### Training + +For training, run + +```Shell +python main.py [configs/path_to_your_config] --gpus [gpu_ids] +``` + + +For example, run +```Shell +python main.py configs/resa/resa50_culane.py --gpus 0 1 2 3 +``` + +### Testing +For testing, run +```Shell +python main.py [configs/path_to_your_config] --validate --load_from [path_to_your_model] [gpu_num] +``` + +For example, run +```Shell +python main.py configs/resa/resa50_culane.py --validate --load_from culane_resnet50.pth --gpus 0 1 2 3 +``` + +For visualization, just add `--view`. + + +## Contributing +We appreciate all contributions to improve LaneDet. Any pull requests or issues are welcomed. + +## Licenses +This project is released under the [Apache 2.0 license](LICNESE). + + +## Acknowledgement + +* [open-mmlab/mmdetection](https://github.com/open-mmlab/mmdetection) +* [pytorch/vision](https://github.com/pytorch/vision) +* [cardwing/Codes-for-Lane-Detection](https://github.com/cardwing/Codes-for-Lane-Detection) +* [XingangPan/SCNN](https://github.com/XingangPan/SCNN) +* [ZJULearning/resa](https://github.com/ZJULearning/resa) +* [cfzd/Ultra-Fast-Lane-Detection](https://github.com/cfzd/Ultra-Fast-Lane-Detection) + + + diff --git a/configs/resa/README.md b/configs/resa/README.md new file mode 100644 index 0000000..cf16e72 --- /dev/null +++ b/configs/resa/README.md @@ -0,0 +1,17 @@ +# RESA: Recurrent Feature-Shift Aggregator for Lane Detection + +## Introduction + +```latex +@misc{zheng2020resa, + title={RESA: Recurrent Feature-Shift Aggregator for Lane Detection}, + author={Tu Zheng and Hao Fang and Yi Zhang and Wenjian Tang and Zheng Yang and Haifeng Liu and Deng Cai}, + year={2020}, + eprint={2008.13719}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` + +## Models +comming soon diff --git a/configs/resa/resa18_tusimple.py b/configs/resa/resa18_tusimple.py new file mode 100644 index 0000000..d7f5552 --- /dev/null +++ b/configs/resa/resa18_tusimple.py @@ -0,0 +1,92 @@ +net = dict( + type='Segmentor', +) + +backbone = dict( + type='ResNetWrapper', + resnet='resnet18', + pretrained=True, + replace_stride_with_dilation=[False, True, True], + out_conv=True, +) +featuremap_out_channel = 128 +featuremap_out_stride = 8 + +aggregator = dict( + type='RESA', + direction=['d', 'u', 'r', 'l'], + alpha=2.0, + iter=4, + conv_stride=9, +) + +heads = [ + dict(type='BUSD'), + dict(type='ExistHead'), +] + +trainer = dict( + type='RESA' +) + +evaluator = dict( + type='Tusimple', +) + +optimizer = dict( + type = 'sgd', + lr = 0.025, + weight_decay = 1e-4, + momentum = 0.9 +) + + +epochs = 100 +batch_size = 8 +total_iter = (3616 // batch_size + 1) * epochs +import math +scheduler = dict( + type = 'LambdaLR', + lr_lambda = lambda _iter : math.pow(1 - _iter/total_iter, 0.9) +) + +bg_weight = 0.4 + +img_norm = dict( + mean=[103.939, 116.779, 123.68], + std=[1., 1., 1.] +) + +img_height = 368 +img_width = 640 +cut_height = 160 +seg_label = "seg_label6" + +dataset_path = './data/tusimple' +dataset = dict( + train=dict( + type='TuSimple', + img_path=dataset_path, + data_list='train_val_gt.txt' + ), + val=dict( + type='TuSimple', + img_path=dataset_path, + data_list='test_gt.txt' + ), + test=dict( + type='TuSimple', + img_path=dataset_path, + data_list='test_gt.txt' + ) +) + + +batch_size = 8 +workers = 12 +num_classes = 6 + 1 +ignore_label = 255 +log_interval = 100 +eval_ep = 1 +save_ep = epochs +test_json_file='data/tusimple/test_label.json' diff --git a/configs/resa/resa34_tusimple.py b/configs/resa/resa34_tusimple.py new file mode 100644 index 0000000..3ee666d --- /dev/null +++ b/configs/resa/resa34_tusimple.py @@ -0,0 +1,90 @@ +net = dict( + type='Segmentor', +) + +backbone = dict( + type='ResNetWrapper', + resnet='resnet34', + pretrained=True, + replace_stride_with_dilation=[False, True, True], + out_conv=True, +) +featuremap_out_channel = 128 +featuremap_out_stride = 8 + +aggregator = dict( + type='RESA', + direction=['d', 'u', 'r', 'l'], + alpha=2.0, + iter=4, + conv_stride=9, +) + +heads = [ + dict(type='BUSD'), + dict(type='ExistHead'), +] + +trainer = dict( + type='RESA' +) + +evaluator = dict( + type='Tusimple', +) + +optimizer = dict( + type = 'sgd', + lr = 0.025, + weight_decay = 1e-4, + momentum = 0.9 +) + +epochs = 100 +batch_size = 8 +total_iter = (3616 // batch_size + 1) * epochs +import math +scheduler = dict( + type = 'LambdaLR', + lr_lambda = lambda _iter : math.pow(1 - _iter/total_iter, 0.9) +) + +bg_weight = 0.4 + +img_norm = dict( + mean=[103.939, 116.779, 123.68], + std=[1., 1., 1.] +) + +img_height = 368 +img_width = 640 +cut_height = 160 +seg_label = "seg_label6" + +dataset_path = './data/tusimple' +dataset = dict( + train=dict( + type='TuSimple', + img_path=dataset_path, + data_list='train_val_gt.txt' + ), + val=dict( + type='TuSimple', + img_path=dataset_path, + data_list='test_gt.txt' + ), + test=dict( + type='TuSimple', + img_path=dataset_path, + data_list='test_gt.txt' + ) +) + + +workers = 12 +num_classes = 6 + 1 +ignore_label = 255 +log_interval = 100 +eval_ep = 1 +save_ep = epochs +test_json_file='data/tusimple/test_label.json' diff --git a/configs/resa/resa50_culane.py b/configs/resa/resa50_culane.py new file mode 100644 index 0000000..fa3a2e2 --- /dev/null +++ b/configs/resa/resa50_culane.py @@ -0,0 +1,92 @@ +net = dict( + type='Segmentor', +) + +backbone = dict( + type='ResNetWrapper', + resnet='resnet50', + pretrained=True, + replace_stride_with_dilation=[False, True, True], + out_conv=True, + in_channels=[64, 128, 256, 512] +) +featuremap_out_channel = 128 +featuremap_out_stride = 8 + +aggregator = dict( + type='RESA', + direction=['d', 'u', 'r', 'l'], + alpha=2.0, + iter=4, + conv_stride=9, +) + +heads = [ + dict(type='PlainDecoder'), + dict(type='ExistHead'), +] + +trainer = dict( + type='RESA' +) + +evaluator = dict( + type='CULane', +) + +optimizer = dict( + type = 'sgd', + lr = 0.030, + weight_decay = 1e-4, + momentum = 0.9 +) + +epochs = 12 +batch_size = 8 +total_iter = (88880 // batch_size) * epochs +import math +scheduler = dict( + type = 'LambdaLR', + lr_lambda = lambda _iter : math.pow(1 - _iter/total_iter, 0.9) +) + +seg_loss_weight = 1.0 +eval_ep = 6 +save_ep = epochs + +bg_weight = 0.4 + +img_norm = dict( + mean=[103.939, 116.779, 123.68], + std=[1., 1., 1.] +) + +img_height = 288 +img_width = 800 +cut_height = 240 + +dataset_path = './data/CULane' +dataset = dict( + train=dict( + type='CULane', + img_path=dataset_path, + data_list='train_gt.txt', + ), + val=dict( + type='CULane', + img_path=dataset_path, + data_list='test_img.txt', + ), + test=dict( + type='CULane', + img_path=dataset_path, + data_list='test_img.txt', + ) +) + + +workers = 12 +num_classes = 4 + 1 +ignore_label = 255 +log_interval = 1000 + diff --git a/configs/scnn/README.md b/configs/scnn/README.md new file mode 100644 index 0000000..7b8212d --- /dev/null +++ b/configs/scnn/README.md @@ -0,0 +1,16 @@ +# Spatial As Deep: Spatial CNN for Traffic Scene Understanding + +## Introduction + +```latex +@inproceedings{pan2018SCNN, + author = {Xingang Pan, Jianping Shi, Ping Luo, Xiaogang Wang, and Xiaoou Tang}, + title = {Spatial As Deep: Spatial CNN for Traffic Scene Understanding}, + booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, + month = {February}, + year = {2018} +} +``` + +## Models +comming soon diff --git a/configs/scnn/scnn18_tusimple.py b/configs/scnn/scnn18_tusimple.py new file mode 100644 index 0000000..ed38300 --- /dev/null +++ b/configs/scnn/scnn18_tusimple.py @@ -0,0 +1,86 @@ +net = dict( + type='Segmentor', +) + +backbone = dict( + type='ResNetWrapper', + resnet='resnet18', + pretrained=True, + replace_stride_with_dilation=[False, True, True], + out_conv=True, +) +featuremap_out_channel = 128 +featuremap_out_stride = 8 + +aggregator = dict( + type='SCNN', +) + +heads = [ + dict(type='BUSD'), + dict(type='ExistHead'), +] + +trainer = dict( + type='RESA' +) + +evaluator = dict( + type='Tusimple', +) + +optimizer = dict( + type = 'sgd', + lr = 0.025, + weight_decay = 1e-4, + momentum = 0.9 +) + +epochs = 100 +batch_size = 8 +total_iter = (3616 // batch_size + 1) * epochs +import math +scheduler = dict( + type = 'LambdaLR', + lr_lambda = lambda _iter : math.pow(1 - _iter/total_iter, 0.9) +) + +bg_weight = 0.4 + +img_norm = dict( + mean=[103.939, 116.779, 123.68], + std=[1., 1., 1.] +) + +img_height = 368 +img_width = 640 +cut_height = 160 +seg_label = "seg_label6" + +dataset_path = './data/tusimple' +dataset = dict( + train=dict( + type='TuSimple', + img_path=dataset_path, + data_list='train_val_gt.txt' + ), + val=dict( + type='TuSimple', + img_path=dataset_path, + data_list='test_gt.txt' + ), + test=dict( + type='TuSimple', + img_path=dataset_path, + data_list='test_gt.txt' + ) +) + + +workers = 12 +num_classes = 6 + 1 +ignore_label = 255 +log_interval = 100 +eval_ep = 1 +save_ep = epochs +test_json_file='data/tusimple/test_label.json' diff --git a/configs/scnn/vgg16_culane.py b/configs/scnn/vgg16_culane.py new file mode 100644 index 0000000..07395ff --- /dev/null +++ b/configs/scnn/vgg16_culane.py @@ -0,0 +1,85 @@ +net = dict( + type='Segmentor', +) + +backbone = dict( + type='VGG', +) +featuremap_out_channel = 128 +featuremap_out_stride = 8 + +aggregator = dict( + type='SCNN', +) + +heads = [ + dict(type='BUSD'), + dict(type='ExistHead'), +] + +trainer = dict( + type='RESA' +) + +evaluator = dict( + type='CULane', +) + +optimizer = dict( + type='sgd', + lr=0.015, + weight_decay = 1e-4, + momentum = 0.9 +) + +epochs = 12 +batch_size = 8 +total_iter = (88880 // batch_size) * epochs +import math +scheduler = dict( + type = 'LambdaLR', + lr_lambda = lambda _iter : math.pow(1 - _iter/total_iter, 0.9) +) + +seg_loss_weight = 2.0 +eval_ep = 6 +save_ep = epochs + +bg_weight = 0.4 +seg_loss_weight = 2.0 + +img_norm = dict( + mean=[103.939, 116.779, 123.68], + std=[1., 1., 1.] +) + +img_height = 288 +img_width = 800 +cut_height = 240 + +dataset_path = './data/CULane' +dataset = dict( + train=dict( + type='CULane', + img_path=dataset_path, + data_list='train_gt.txt', + ), + val=dict( + type='CULane', + img_path=dataset_path, + data_list='test_img.txt', + ), + test=dict( + type='CULane', + img_path=dataset_path, + data_list='test_img.txt', + ) +) + + +workers = 12 +num_classes = 4 + 1 +ignore_label = 255 +log_interval = 1000 +log_note = '' + diff --git a/configs/ufld/README.md b/configs/ufld/README.md new file mode 100644 index 0000000..cdaa8d6 --- /dev/null +++ b/configs/ufld/README.md @@ -0,0 +1,16 @@ +# Ultra Fast Structure-aware Deep Lane Detection + +## Introduction + +```latex +@inproceedings{pan2018SCNN, + author = {Xingang Pan, Jianping Shi, Ping Luo, Xiaogang Wang, and Xiaoou Tang}, + title = {Spatial As Deep: Spatial CNN for Traffic Scene Understanding}, + booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, + month = {February}, + year = {2018} +} +``` + +## Model +comming soon diff --git a/configs/ufld/resnet18_culane.py b/configs/ufld/resnet18_culane.py new file mode 100644 index 0000000..565a859 --- /dev/null +++ b/configs/ufld/resnet18_culane.py @@ -0,0 +1,83 @@ +net = dict( + type='Segmentor', +) + +backbone = dict( + type='ResNetWrapper', + resnet='resnet18', + pretrained=True, + replace_stride_with_dilation=[False, False, False], + out_conv=False, +) +featuremap_out_channel = 512 + +aggregator=None + +griding_num = 200 +num_classes = 4 +heads = [ + dict(type='LaneCls', + dim = (griding_num + 1, 18, num_classes)) +] + +trainer = dict( + type='LaneCls' +) + +evaluator = dict( + type='CULane', +) + +optimizer = dict( + type = 'sgd', + lr = 0.015, + weight_decay = 1e-4, + momentum = 0.9 +) + +epochs = 50 +batch_size = 32 +total_iter = (88880 // batch_size + 1) * epochs +import math +scheduler = dict( + type = 'LambdaLR', + lr_lambda = lambda _iter : math.pow(1 - _iter/total_iter, 0.9) +) + + +img_norm = dict( + mean=[103.939, 116.779, 123.68], + std=[1., 1., 1.] +) + +ori_img_h = 590 +ori_img_w = 1640 +img_h = 288 +img_w = 800 + +dataset_path = './data/CULane' +row_anchor = 'culane_row_anchor' +dataset = dict( + train=dict( + type='LaneClsDataset', + img_path=dataset_path, + data_list='list/train_gt.txt' + ), + val=dict( + type='LaneClsDataset', + img_path=dataset_path, + data_list='list/test.txt' + ), + test=dict( + type='LaneClsDataset', + img_path=dataset_path, + data_list='list/test.txt' + ) +) + +workers = 12 +ignore_label = 255 +log_interval = 100 +eval_ep = epochs +save_ep = epochs // 4 +y_pixel_gap = 20 diff --git a/configs/ufld/resnet18_tusimple.py b/configs/ufld/resnet18_tusimple.py new file mode 100644 index 0000000..7ee2a9f --- /dev/null +++ b/configs/ufld/resnet18_tusimple.py @@ -0,0 +1,97 @@ +net = dict( + type='Segmentor', +) + +backbone = dict( + type='ResNetWrapper', + resnet='resnet18', + pretrained=True, + replace_stride_with_dilation=[False, False, False], + out_conv=False, +) +featuremap_out_channel = 512 + +aggregator=None + +griding_num = 100 +num_classes = 6 +heads = [ + dict(type='LaneCls', + dim = (griding_num + 1, 56, num_classes)) +] + +trainer = dict( + type='LaneCls' +) + +evaluator = dict( + type='Tusimple', +) + +optimizer = dict( + type = 'sgd', + lr = 0.030, + weight_decay = 1e-4, + momentum = 0.9 +) + +import math +scheduler = dict( + type = 'LambdaLR', + lr_lambda = lambda _iter : math.pow(1 - _iter/total_iter, 0.9) +) + +optimizer = dict( + type = 'sgd', + lr = 0.025, + weight_decay = 1e-4, + momentum = 0.9 +) + +epochs = 100 +batch_size = 4 +total_iter = (3616 // batch_size + 1) * epochs +import math +scheduler = dict( + type = 'LambdaLR', + lr_lambda = lambda _iter : math.pow(1 - _iter/total_iter, 0.9) +) + + +img_norm = dict( + mean=[103.939, 116.779, 123.68], + std=[1., 1., 1.] +) + +ori_img_h = 720 +ori_img_w = 1280 +img_h = 288 +img_w = 800 + +dataset_path = './data/tusimple' +dataset = dict( + train=dict( + type='LaneClsDataset', + img_path=dataset_path, + data_list='seg_label/list/train_val_gt.txt' + ), + val=dict( + type='LaneClsDataset', + img_path=dataset_path, + data_list='seg_label/list/test_gt.txt' + ), + test=dict( + type='LaneClsDataset', + img_path=dataset_path, + data_list='seg_label/list/test_gt.txt' + ) +) + +workers = 12 +ignore_label = 255 +log_interval = 100 +eval_ep = 1 +save_ep = epochs +row_anchor='tusimple_row_anchor' +test_json_file='data/tusimple/test_label.json' +y_pixel_gap = 10 diff --git a/lanedet/datasets/__init__.py b/lanedet/datasets/__init__.py new file mode 100644 index 0000000..3219b44 --- /dev/null +++ b/lanedet/datasets/__init__.py @@ -0,0 +1,5 @@ +from .registry import build_dataset, build_dataloader + +from .lane_cls_dataset import LaneClsDataset +from .tusimple import TuSimple +from .culane import CULane diff --git a/lanedet/datasets/base_dataset.py b/lanedet/datasets/base_dataset.py new file mode 100644 index 0000000..4ef90eb --- /dev/null +++ b/lanedet/datasets/base_dataset.py @@ -0,0 +1,84 @@ +import os.path as osp +import os +import numpy as np +import cv2 +import torch +from torch.utils.data import Dataset +import torchvision +import lanedet.utils.transforms as tf +from .registry import DATASETS + + +@DATASETS.register_module +class BaseDataset(Dataset): + def __init__(self, img_path, data_list, list_path='list', cfg=None): + self.cfg = cfg + self.img_path = img_path + self.list_path = osp.join(img_path, list_path) + self.data_list = data_list + self.is_testing = ('test' in data_list) + + self.img_name_list = [] + self.full_img_path_list = [] + self.label_list = [] + self.exist_list = [] + + self.transform = self.transform_val() if self.is_testing else self.transform_train() + + self.init() + + def transform_train(self): + raise NotImplementedError() + + def transform_val(self): + val_transform = torchvision.transforms.Compose([ + tf.SampleResize((self.cfg.img_width, self.cfg.img_height)), + tf.GroupNormalize(mean=(self.cfg.img_norm['mean'], (0, )), std=( + self.cfg.img_norm['std'], (1, ))), + ]) + return val_transform + + def view(self, img, coords, file_path=None): + for coord in coords: + for x, y in coord: + if x <= 0 or y <= 0: + continue + x, y = int(x), int(y) + cv2.circle(img, (x, y), 4, (255, 0, 0), 2) + + if file_path is not None: + if not os.path.exists(osp.dirname(file_path)): + os.makedirs(osp.dirname(file_path)) + cv2.imwrite(file_path, img) + + + def init(self): + raise NotImplementedError() + + + def __len__(self): + return len(self.full_img_path_list) + + def __getitem__(self, idx): + img = cv2.imread(self.full_img_path_list[idx]).astype(np.float32) + label = cv2.imread(self.label_list[idx], cv2.IMREAD_UNCHANGED) + if len(label.shape) > 2: + label = label[:, :, 0] + label = label.squeeze() + + img = img[self.cfg.cut_height:, :, :] + label = label[self.cfg.cut_height:, :] + + exist = self.exist_list[idx] + + if self.transform: + img, label = self.transform((img, label)) + + img = torch.from_numpy(img).permute(2, 0, 1).contiguous().float() + label = torch.from_numpy(label).contiguous().long() + meta = {'full_img_path': self.full_img_path_list[idx], + 'img_name': self.img_name_list[idx]} + + data = {'img': img, 'label': label, + 'exist': exist, 'meta': meta} + return data diff --git a/lanedet/datasets/culane.py b/lanedet/datasets/culane.py new file mode 100644 index 0000000..a4aa532 --- /dev/null +++ b/lanedet/datasets/culane.py @@ -0,0 +1,81 @@ +import os +import os.path as osp +import numpy as np +import torchvision +import lanedet.utils.transforms as tf +from .base_dataset import BaseDataset +from .registry import DATASETS +import cv2 +import torch + + +@DATASETS.register_module +class CULane(BaseDataset): + def __init__(self, img_path, data_list, cfg=None): + super().__init__(img_path, data_list, cfg=cfg) + self.ori_imgh = 590 + self.ori_imgw = 1640 + + def init(self): + with open(osp.join(self.list_path, self.data_list)) as f: + for line in f: + line_split = line.strip().split(" ") + self.img_name_list.append(line_split[0]) + self.full_img_path_list.append(self.img_path + line_split[0]) + self.label_list.append(self.img_path + line_split[1]) + self.exist_list.append( + np.array([int(line_split[2]), int(line_split[3]), + int(line_split[4]), int(line_split[5])])) + + def transform_train(self): + train_transform = torchvision.transforms.Compose([ + tf.GroupRandomRotation(degree=(-2, 2)), + tf.GroupRandomHorizontalFlip(), + tf.SampleResize((self.cfg.img_width, self.cfg.img_height)), + tf.GroupNormalize(mean=(self.cfg.img_norm['mean'], (0, )), std=( + self.cfg.img_norm['std'], (1, ))), + ]) + return train_transform + + def get_lane(self, output): + segs, exists = output['seg'], output['exist'] + segs = segs.cpu().numpy() + exists = exists.cpu().numpy() + ret = [] + for seg, exist in zip(segs, exists): + lanes = self.probmap2lane(seg, exist) + ret.append(lanes) + return ret + + + def probmap2lane(self, probmaps, exists, pts=18): + coords = [] + probmaps = probmaps[1:, ...] + exists = exists > 0.5 + for probmap, exist in zip(probmaps, exists): + if exist == 0: + continue + probmap = cv2.blur(probmap, (9, 9), borderType=cv2.BORDER_REPLICATE) + thr = 0.3 + coordinate = np.zeros(pts) + cut_height = self.cfg.cut_height + for i in range(pts): + line = probmap[round( + self.cfg.img_height-i*20/(self.ori_imgh-cut_height)*self.cfg.img_height)-1] + + if np.max(line) > thr: + coordinate[i] = np.argmax(line)+1 + if np.sum(coordinate > 0) < 2: + continue + + img_coord = np.zeros((pts, 2)) + img_coord[:, :] = -1 + for idx, value in enumerate(coordinate): + if value > 0: + img_coord[idx][0] = round(value*self.ori_imgw/self.cfg.img_width-1) + img_coord[idx][1] = round(self.ori_imgh-idx*20-1) + + img_coord = img_coord.astype(int) + coords.append(img_coord) + + return coords diff --git a/lanedet/datasets/lane_cls_dataset.py b/lanedet/datasets/lane_cls_dataset.py new file mode 100644 index 0000000..929d9ac --- /dev/null +++ b/lanedet/datasets/lane_cls_dataset.py @@ -0,0 +1,227 @@ +""" +Reference: + https://github.com/cfzd/Ultra-Fast-Lane-Detection +""" + +import torch +from PIL import Image +import os +import os.path as osp +import pdb +import numpy as np +import cv2 +import torchvision.transforms as transforms +from lanedet.utils import mytransforms as mytransforms +from lanedet.utils.mytransforms import find_start_pos +import scipy +from .base_dataset import BaseDataset + + +from .registry import DATASETS + +tusimple_row_anchor = [ 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, + 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, + 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, + 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268, + 272, 276, 280, 284] + +culane_row_anchor = [121, 131, 141, 150, 160, 170, 180, 189, 199, 209, 219, 228, 238, 248, 258, 267, 277, 287] + +def loader_func(path): + return Image.open(path) + +@DATASETS.register_module +class LaneClsDataset(torch.utils.data.Dataset): + def __init__(self, img_path, data_list, cfg=None): + super(LaneClsDataset, self).__init__() + self.cfg = cfg + self.img_transform = None + self.simu_transform = None + self.path = img_path + self.griding_num = cfg.griding_num + #self.use_aux = cfg.use_aux + self.ori_img_h = cfg.ori_img_h + self.ori_img_w = cfg.ori_img_w + + self.is_training = not ('test' in data_list) + + list_path = os.path.join(img_path, data_list) + with open(list_path, 'r') as f: + self.list = f.readlines() + + self.row_anchor = eval(cfg.row_anchor) + self.row_anchor.sort() + self.set_transform() + + def view(self, img, coords, file_path=None): + for coord in coords: + for x, y in coord: + if x <= 0 or y <= 0: + continue + x, y = int(x), int(y) + cv2.circle(img, (x, y), 4, (255, 0, 0), 2) + + if file_path is not None: + if not os.path.exists(osp.dirname(file_path)): + os.makedirs(osp.dirname(file_path)) + cv2.imwrite(file_path, img) + + def set_transform(self): + self.img_transform = transforms.Compose([ + transforms.Resize((288, 800)), + transforms.ToTensor(), + transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + if self.is_training: + self.simu_transform = mytransforms.Compose2([ + mytransforms.RandomRotate(6), + mytransforms.RandomUDoffsetLABEL(100), + mytransforms.RandomLROffsetLABEL(200) + ]) + + + def __getitem__(self, index): + l = self.list[index] + l_info = l.split() + if self.is_training: + img_name, label_name = l_info[0], l_info[1] + else: + img_name = l_info[0] + + img_path = self.path +'/'+ img_name + img = loader_func(img_path) + + cls_label = None + if self.is_training: + label_path = self.path + label_name + label = loader_func(label_path) + if self.simu_transform is not None: + img, label = self.simu_transform(img, label) + + lane_pts = self._get_index(label) + + w, h = img.size + cls_label = self._grid_pts(lane_pts, self.griding_num, w) + + if self.img_transform is not None: + img = self.img_transform(img) + + # if self.use_aux: + # assert self.segment_transform is not None + # seg_label = self.segment_transform(label) + meta = {'img_name': img_name, 'full_img_path': img_path} + + if self.is_training: + ret = {'img': img, 'cls_label': cls_label, 'meta': meta} + else: + ret = {'img': img, 'meta': meta} + + return ret + + + def __len__(self): + return len(self.list) + + def _grid_pts(self, pts, num_cols, w): + # pts : numlane,n,2 + num_lane, n, n2 = pts.shape + col_sample = np.linspace(0, w - 1, num_cols) + + assert n2 == 2 + to_pts = np.zeros((n, num_lane)) + tot_len = col_sample[1] - col_sample[0] + for i in range(num_lane): + pti = pts[i, :, 1] + to_pts[:, i] = np.asarray( + [int(pt // tot_len) if pt != -1 else num_cols for pt in pti]) + return to_pts.astype(int) + + def postprocess(self, out, localization_type='rel', flip_updown=True): + predictions = [] + griding_num = self.cfg.griding_num + for j in range(out.shape[0]): + out_j = out[j].data.cpu().numpy() + if flip_updown: + out_j = out_j[:, ::-1, :] + if localization_type == 'abs': + out_j = np.argmax(out_j, axis=0) + out_j[out_j == griding_num] = -1 + out_j = out_j + 1 + elif localization_type == 'rel': + prob = scipy.special.softmax(out_j[:-1, :, :], axis=0) + idx = np.arange(griding_num) + 1 + idx = idx.reshape(-1, 1, 1) + loc = np.sum(prob * idx, axis=0) + out_j = np.argmax(out_j, axis=0) + loc[out_j == griding_num] = 0 + out_j = loc + else: + raise NotImplementedError + predictions.append(out_j) + return predictions + + def get_lane(self, pred): + predictions = self.postprocess(pred['cls']) + ret = [] + griding_num = self.cfg.griding_num + for out in predictions: + lanes = [] + for i in range(out.shape[1]): + if sum(out[:, i] != 0) <= 2: continue + out_i = out[:, i] + coord = np.zeros((out.shape[0], 2)) + coord.fill(-1) + for k in range(out.shape[0]): + coord[k][0] = int((out_i[k]-0.5) * self.ori_img_w / (griding_num - 1)) if out[k, i] > 0 else -1 + coord[k][1] = int(self.ori_img_h-k*self.cfg.y_pixel_gap) - 10 + lanes.append(coord) + ret.append(lanes) + return ret + + def _get_index(self, label): + w, h = label.size + + if h != 288: + scale_f = lambda x : int((x * 1.0/288) * h) + sample_tmp = list(map(scale_f,self.row_anchor)) + + num_classes = self.cfg.num_classes + + all_idx = np.zeros((num_classes, len(sample_tmp),2)) + for i,r in enumerate(sample_tmp): + label_r = np.asarray(label)[int(round(r))] + for lane_idx in range(1, num_classes+1): + pos = np.where(label_r == lane_idx)[0] + if len(pos) == 0: + all_idx[lane_idx - 1, i, 0] = r + all_idx[lane_idx - 1, i, 1] = -1 + continue + pos = np.mean(pos) + all_idx[lane_idx - 1, i, 0] = r + all_idx[lane_idx - 1, i, 1] = pos + + all_idx_cp = all_idx.copy() + for i in range(num_classes): + if np.all(all_idx_cp[i,:,1] == -1): + continue + + valid = all_idx_cp[i,:,1] != -1 + valid_idx = all_idx_cp[i,valid,:] + if valid_idx[-1,0] == all_idx_cp[0,-1,0]: + continue + if len(valid_idx) < 6: + continue + + valid_idx_half = valid_idx[len(valid_idx) // 2:,:] + p = np.polyfit(valid_idx_half[:,0], valid_idx_half[:,1],deg = 1) + start_line = valid_idx_half[-1,0] + pos = find_start_pos(all_idx_cp[i,:,0],start_line) + 1 + + fitted = np.polyval(p,all_idx_cp[i,pos:,0]) + fitted = np.array([-1 if y < 0 or y > w-1 else y for y in fitted]) + + assert np.all(all_idx_cp[i,pos:,1] == -1) + all_idx_cp[i,pos:,1] = fitted + if -1 in all_idx[:, :, 0]: + pdb.set_trace() + return all_idx_cp diff --git a/lanedet/datasets/registry.py b/lanedet/datasets/registry.py new file mode 100644 index 0000000..a104934 --- /dev/null +++ b/lanedet/datasets/registry.py @@ -0,0 +1,32 @@ +from lanedet.utils import Registry, build_from_cfg + +import torch + +DATASETS = Registry('datasets') + +def build(cfg, registry, default_args=None): + if isinstance(cfg, list): + modules = [ + build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg + ] + return nn.Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + + +def build_dataset(split_cfg, cfg): + return build(split_cfg, DATASETS, default_args=dict(cfg=cfg)) + +def build_dataloader(split_cfg, cfg, is_train=True): + if is_train: + shuffle = True + else: + shuffle = False + + dataset = build_dataset(split_cfg, cfg) + + data_loader = torch.utils.data.DataLoader( + dataset, batch_size = cfg.batch_size, shuffle = shuffle, + num_workers = cfg.workers, pin_memory = False, drop_last = False) + + return data_loader diff --git a/lanedet/datasets/tusimple.py b/lanedet/datasets/tusimple.py new file mode 100644 index 0000000..2e5bf35 --- /dev/null +++ b/lanedet/datasets/tusimple.py @@ -0,0 +1,158 @@ +import os.path as osp +import numpy as np +import cv2 +import torchvision +import lanedet.utils.transforms as tf +from .base_dataset import BaseDataset +from .registry import DATASETS + + +@DATASETS.register_module +class TuSimple(BaseDataset): + def __init__(self, img_path, data_list, cfg=None): + super().__init__(img_path, data_list, 'seg_label/list', cfg) + + def transform_train(self): + input_mean = self.cfg.img_norm['mean'] + train_transform = torchvision.transforms.Compose([ + tf.GroupRandomRotation(), + tf.GroupRandomHorizontalFlip(), + tf.SampleResize((self.cfg.img_width, self.cfg.img_height)), + tf.GroupNormalize(mean=(self.cfg.img_norm['mean'], (0, )), std=( + self.cfg.img_norm['std'], (1, ))), + ]) + return train_transform + + + def init(self): + with open(osp.join(self.list_path, self.data_list)) as f: + for line in f: + line_split = line.strip().split(" ") + self.img_name_list.append(line_split[0]) + self.full_img_path_list.append(self.img_path + line_split[0]) + self.label_list.append(self.img_path + line_split[1]) + self.exist_list.append( + np.array([int(line_split[2]), int(line_split[3]), + int(line_split[4]), int(line_split[5]), + int(line_split[6]), int(line_split[7]) + ])) + + def fix_gap(self, coordinate): + if any(x > 0 for x in coordinate): + start = [i for i, x in enumerate(coordinate) if x > 0][0] + end = [i for i, x in reversed(list(enumerate(coordinate))) if x > 0][0] + lane = coordinate[start:end+1] + if any(x < 0 for x in lane): + gap_start = [i for i, x in enumerate( + lane[:-1]) if x > 0 and lane[i+1] < 0] + gap_end = [i+1 for i, + x in enumerate(lane[:-1]) if x < 0 and lane[i+1] > 0] + gap_id = [i for i, x in enumerate(lane) if x < 0] + if len(gap_start) == 0 or len(gap_end) == 0: + return coordinate + for id in gap_id: + for i in range(len(gap_start)): + if i >= len(gap_end): + return coordinate + if id > gap_start[i] and id < gap_end[i]: + gap_width = float(gap_end[i] - gap_start[i]) + lane[id] = int((id - gap_start[i]) / gap_width * lane[gap_end[i]] + ( + gap_end[i] - id) / gap_width * lane[gap_start[i]]) + if not all(x > 0 for x in lane): + print("Gaps still exist!") + coordinate[start:end+1] = lane + return coordinate + + def is_short(self, lane): + start = [i for i, x in enumerate(lane) if x > 0] + if not start: + return 1 + else: + return 0 + + def get_coord(self, prob_map, y_px_gap, pts, thresh, resize_shape=None): + """ + Arguments: + ---------- + prob_map: prob map for single lane, np array size (h, w) + resize_shape: reshape size target, (H, W) + + Return: + ---------- + coords: x coords bottom up every y_px_gap px, 0 for non-exist, in resized shape + """ + if resize_shape is None: + resize_shape = prob_map.shape + h, w = prob_map.shape + H, W = resize_shape + H -= self.cfg.cut_height + + coords = np.zeros(pts) + coords[:] = -1.0 + for i in range(pts): + y = int((H - 10 - i * y_px_gap) * h / H) + if y < 0: + break + line = prob_map[y, :] + id = np.argmax(line) + if line[id] > thresh: + coords[i] = int(id / w * W) + if (coords > 0).sum() < 2: + coords = np.zeros(pts) + self.fix_gap(coords) + + return coords + + def probmap2lane(self, seg_pred, exist, resize_shape=(720, 1280), smooth=True, y_px_gap=10, pts=56, thresh=0.6): + """ + Arguments: + ---------- + seg_pred: np.array size (5, h, w) + resize_shape: reshape size target, (H, W) + exist: list of existence, e.g. [0, 1, 1, 0] + smooth: whether to smooth the probability or not + y_px_gap: y pixel gap for sampling + pts: how many points for one lane + thresh: probability threshold + + Return: + ---------- + coordinates: [x, y] list of lanes, e.g.: [ [[9, 569], [50, 549]] ,[[630, 569], [647, 549]] ] + """ + if resize_shape is None: + resize_shape = seg_pred.shape[1:] # seg_pred (5, h, w) + _, h, w = seg_pred.shape + H, W = resize_shape + coordinates = [] + + for i in range(self.cfg.num_classes - 1): + prob_map = seg_pred[i + 1] + if smooth: + prob_map = cv2.blur(prob_map, (9, 9), borderType=cv2.BORDER_REPLICATE) + coords = self.get_coord(prob_map, y_px_gap, pts, thresh, resize_shape) + if self.is_short(coords): + continue + coordinates.append( + [[coords[j], H - 10 - j * y_px_gap] if coords[j] > 0 else [-1, H - 10 - j * y_px_gap] for j in + range(pts)]) + + + if len(coordinates) == 0: + coords = np.zeros(pts) + coordinates.append( + [[coords[j], H - 10 - j * y_px_gap] if coords[j] > 0 else [-1, H - 10 - j * y_px_gap] for j in + range(pts)]) + #print(coordinates) + + return coordinates + + def get_lane(self, output): + seg_pred, exist_pred = output['seg'], output['exist'] + seg_pred = seg_pred.detach().cpu().numpy() + exist_pred = exist_pred.detach().cpu().numpy() + rets = [] + for seg, exist in zip(seg_pred, exist_pred): + lanes = self.probmap2lane(seg, exist) + rets.append(lanes) + return rets + diff --git a/lanedet/models/__init__.py b/lanedet/models/__init__.py new file mode 100644 index 0000000..22e74a8 --- /dev/null +++ b/lanedet/models/__init__.py @@ -0,0 +1,6 @@ +from .backbone import * +from .aggregator import * +from .heads import * +from .net import * + +from .registry import build_backbone diff --git a/lanedet/models/aggregator/__init__.py b/lanedet/models/aggregator/__init__.py new file mode 100644 index 0000000..bfd4708 --- /dev/null +++ b/lanedet/models/aggregator/__init__.py @@ -0,0 +1,3 @@ +from .scnn import SCNN +from .resa import RESA +from .aspp import ASPP diff --git a/lanedet/models/aggregator/aspp.py b/lanedet/models/aggregator/aspp.py new file mode 100644 index 0000000..955b781 --- /dev/null +++ b/lanedet/models/aggregator/aspp.py @@ -0,0 +1,49 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url + +from lanedet.models.registry import AGGREGATOR + +class Atrous_module(nn.Module): + def __init__(self, inplanes, planes, rate): + super(Atrous_module, self).__init__() + self.atrous_convolution = nn.Conv2d(inplanes, planes, kernel_size=3, + stride=1, padding=rate, dilation=rate) + self.batch_norm = nn.BatchNorm2d(planes) + + def forward(self, x): + x = self.atrous_convolution(x) + x = self.batch_norm(x) + + return x + + +@AGGREGATOR.register_module +class ASPP(nn.Module): + def __init__(self, cfg): + super(ASPP, self).__init__() + rates = [1, 6, 12, 18] + in_channel = 128 + self.aspp1 = Atrous_module(in_channel, 256, rate=rates[0]) + self.aspp2 = Atrous_module(in_channel, 256, rate=rates[1]) + self.aspp3 = Atrous_module(in_channel, 256, rate=rates[2]) + self.aspp4 = Atrous_module(in_channel, 256, rate=rates[3]) + self.image_pool = nn.Sequential(nn.AdaptiveMaxPool2d(1), + nn.Conv2d(in_channel, 256, kernel_size=1)) + self.fc1 = nn.Sequential(nn.Conv2d(1280, 128, kernel_size=1), + nn.BatchNorm2d(128)) + + def forward(self, x): + x1 = self.aspp1(x) + x2 = self.aspp2(x) + x3 = self.aspp3(x) + x4 = self.aspp4(x) + x5 = self.image_pool(x) + x5 = F.upsample(x5, size=x4.size()[2:], mode='nearest') + + x = torch.cat((x1, x2, x3, x4, x5), dim = 1) + + x = self.fc1(x) + + return x diff --git a/lanedet/models/aggregator/resa.py b/lanedet/models/aggregator/resa.py new file mode 100644 index 0000000..f13d4c8 --- /dev/null +++ b/lanedet/models/aggregator/resa.py @@ -0,0 +1,95 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url + +from lanedet.models.registry import AGGREGATOR +from .aspp import ASPP + +@AGGREGATOR.register_module +class RESA(nn.Module): + def __init__(self, + direction, + alpha, + iter, + conv_stride, + cfg): + super(RESA, self).__init__() + self.cfg = cfg + self.iter = iter + chan = cfg.featuremap_out_channel + fea_stride = cfg.featuremap_out_stride + self.height = cfg.img_height // fea_stride + self.width = cfg.img_width // fea_stride + self.alpha = alpha + + for i in range(self.iter): + conv_vert1 = nn.Conv2d( + chan, chan, (1, conv_stride), + padding=(0, conv_stride//2), groups=1, bias=False) + conv_vert2 = nn.Conv2d( + chan, chan, (1, conv_stride), + padding=(0, conv_stride//2), groups=1, bias=False) + + setattr(self, 'conv_d'+str(i), conv_vert1) + setattr(self, 'conv_u'+str(i), conv_vert2) + + conv_hori1 = nn.Conv2d( + chan, chan, (conv_stride, 1), + padding=(conv_stride//2, 0), groups=1, bias=False) + conv_hori2 = nn.Conv2d( + chan, chan, (conv_stride, 1), + padding=(conv_stride//2, 0), groups=1, bias=False) + + setattr(self, 'conv_r'+str(i), conv_hori1) + setattr(self, 'conv_l'+str(i), conv_hori2) + + idx_d = (torch.arange(self.height) + self.height // + 2**(self.iter - i)) % self.height + setattr(self, 'idx_d'+str(i), idx_d) + + idx_u = (torch.arange(self.height) - self.height // + 2**(self.iter - i)) % self.height + setattr(self, 'idx_u'+str(i), idx_u) + + idx_r = (torch.arange(self.width) + self.width // + 2**(self.iter - i)) % self.width + setattr(self, 'idx_r'+str(i), idx_r) + + idx_l = (torch.arange(self.width) - self.width // + 2**(self.iter - i)) % self.width + setattr(self, 'idx_l'+str(i), idx_l) + + def update(self, x): + height, width = x.size(2), x.size(3) + for i in range(self.iter): + idx_d = (torch.arange(height) + height // + 2**(self.iter - i)) % height + setattr(self, 'idx_d'+str(i), idx_d) + + idx_u = (torch.arange(height) - height // + 2**(self.iter - i)) % height + setattr(self, 'idx_u'+str(i), idx_u) + + idx_r = (torch.arange(width) + width // + 2**(self.iter - i)) % width + setattr(self, 'idx_r'+str(i), idx_r) + + idx_l = (torch.arange(width) - width // + 2**(self.iter - i)) % width + setattr(self, 'idx_l'+str(i), idx_l) + + def forward(self, x): + x = x.clone() + self.update(x) + + for direction in self.cfg.aggregator.direction: + for i in range(self.iter): + conv = getattr(self, 'conv_' + direction + str(i)) + idx = getattr(self, 'idx_' + direction + str(i)) + if direction in ['d', 'u']: + x.add_(self.alpha * F.relu(conv(x[..., idx, :]))) + else: + x.add_(self.alpha * F.relu(conv(x[..., idx]))) + + return x diff --git a/lanedet/models/aggregator/scnn.py b/lanedet/models/aggregator/scnn.py new file mode 100644 index 0000000..8ddf375 --- /dev/null +++ b/lanedet/models/aggregator/scnn.py @@ -0,0 +1,31 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url + +from lanedet.models.registry import AGGREGATOR + + +@AGGREGATOR.register_module +class SCNN(nn.Module): + def __init__(self, cfg=None): + super(SCNN, self).__init__() + self.conv_d = nn.Conv2d(128, 128, (1, 9), padding=(0, 4), bias=False) + self.conv_u = nn.Conv2d(128, 128, (1, 9), padding=(0, 4), bias=False) + self.conv_r = nn.Conv2d(128, 128, (9, 1), padding=(4, 0), bias=False) + self.conv_l = nn.Conv2d(128, 128, (9, 1), padding=(4, 0), bias=False) + + def forward(self, x): + x = x.clone() + for i in range(1, x.shape[2]): + x[..., i:i+1, :].add_(F.relu(self.conv_d(x[..., i-1:i, :]))) + + for i in range(x.shape[2] - 2, 0, -1): + x[..., i:i+1, :].add_(F.relu(self.conv_u(x[..., i+1:i+2, :]))) + + for i in range(1, x.shape[3]): + x[..., i:i+1].add_(F.relu(self.conv_r(x[..., i-1:i]))) + + for i in range(x.shape[3] - 2, 0, -1): + x[..., i:i+1].add_(F.relu(self.conv_l(x[..., i+1:i+2]))) + return x diff --git a/lanedet/models/backbone/__init__.py b/lanedet/models/backbone/__init__.py new file mode 100644 index 0000000..b53e49d --- /dev/null +++ b/lanedet/models/backbone/__init__.py @@ -0,0 +1,3 @@ +from .resnet import ResNet +from .vgg import VGG +from .erfnet import ERFNet diff --git a/lanedet/models/backbone/erfnet.py b/lanedet/models/backbone/erfnet.py new file mode 100644 index 0000000..5a87551 --- /dev/null +++ b/lanedet/models/backbone/erfnet.py @@ -0,0 +1,167 @@ +# ERFNET full network definition for Pytorch +# Sept 2017 +# Eduardo Romera +####################### + +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F +from lanedet.models.registry import BACKBONE + + +class DownsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + + self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True) + self.pool = nn.MaxPool2d(2, stride=2) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3) + + def forward(self, input): + output = torch.cat([self.conv(input), self.pool(input)], 1) + output = self.bn(output) + return F.relu(output) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated, use_dcn=False): + super().__init__() + + if not use_dcn: + self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + else: + self.conv3x1_1 = DCN(chann, chann, (3, 1), stride=1, padding=(1, 0)) + + self.conv1x3_1 = DCN(chann, chann, (1, 3), stride=1, padding=(0, 1)) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, + dilation=(dilated, 1)) + + self.conv1x3_2 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, + dilation=(1, dilated)) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if (self.dropout.p != 0): + output = self.dropout(output) + + return F.relu(output + input) # +input = identity (residual connection) + + +class Encoder(nn.Module): + def __init__(self, num_classes): + super().__init__() + self.initial_block = DownsamplerBlock(3, 16) + + self.layers = nn.ModuleList() + + self.layers.append(DownsamplerBlock(16, 64)) + + for x in range(0, 5): # 5 times + self.layers.append(non_bottleneck_1d(64, 0.1, 1)) + + self.layers.append(DownsamplerBlock(64, 128)) + + for x in range(0, 2): # 2 times + self.layers.append(non_bottleneck_1d(128, 0.1, 2)) + self.layers.append(non_bottleneck_1d(128, 0.1, 4)) + self.layers.append(non_bottleneck_1d(128, 0.1, 8)) + self.layers.append(non_bottleneck_1d(128, 0.1, 16)) + + # only for encoder mode: + self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True) + + def forward(self, input, predict=False): + output = self.initial_block(input) + + for layer in self.layers: + output = layer(output) + + if predict: + output = self.output_conv(output) + + return output + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput): + super().__init__() + self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + self.bn = nn.BatchNorm2d(noutput, eps=1e-3, track_running_stats=True) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + return F.relu(output) + + +class Lane_exist(nn.Module): + def __init__(self, cfg, num_output): + super().__init__() + + self.layers = nn.ModuleList() + + self.layers.append(nn.Conv2d(128, 32, (3, 3), stride=1, padding=(4, 4), bias=False, dilation=(4, 4))) + self.layers.append(nn.BatchNorm2d(32, eps=1e-03)) + + self.layers_final = nn.ModuleList() + + self.layers_final.append(nn.Dropout2d(0.1)) + self.layers_final.append(nn.Conv2d(32, 5, (1, 1), stride=1, padding=(0, 0), bias=True)) + + self.maxpool = nn.MaxPool2d(2, stride=2) + self.linear_dim = int(cfg.img_width / 16 * cfg.img_height / 16 * 5) + self.linear1 = nn.Linear(self.linear_dim, 128) + self.linear2 = nn.Linear(128, num_output) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = F.relu(output) + + for layer in self.layers_final: + output = layer(output) + + output = F.softmax(output, dim=1) + output = self.maxpool(output) + output = output.view(-1, self.linear_dim) + output = self.linear1(output) + output = F.relu(output) + output = self.linear2(output) + output = F.sigmoid(output) + + return output + + +@BACKBONE.register_module +class ERFNet(nn.Module): + def __init__(self, cfg): # use encoder to pass pretrained encoder + super().__init__() + + self.encoder = Encoder(cfg.num_classes) + + def forward(self, input): + output = self.encoder(input) # predict=False by default + return output diff --git a/lanedet/models/backbone/resnet.py b/lanedet/models/backbone/resnet.py new file mode 100644 index 0000000..cce2c76 --- /dev/null +++ b/lanedet/models/backbone/resnet.py @@ -0,0 +1,385 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url + +from lanedet.models.registry import BACKBONE + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', + 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', + 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', + 'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth', + 'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + if groups != 1 or base_width != 64: + raise ValueError( + 'BasicBlock only supports groups=1 and base_width=64') + # if dilation > 1: + # raise NotImplementedError( + # "Dilation > 1 not supported in BasicBlock") + # Both self.conv1 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv3x3(inplanes, planes, stride, dilation=dilation) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes, dilation=dilation) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, + base_width=64, dilation=1, norm_layer=None): + super(Bottleneck, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + # Both self.conv2 and self.downsample layers downsample the input when stride != 1 + self.conv1 = conv1x1(inplanes, width) + self.bn1 = norm_layer(width) + self.conv2 = conv3x3(width, width, stride, groups, dilation) + self.bn2 = norm_layer(width) + self.conv3 = conv1x1(width, planes * self.expansion) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + + +@BACKBONE.register_module +class ResNetWrapper(nn.Module): + + def __init__(self, + resnet = 'resnet18', + pretrained=True, + replace_stride_with_dilation=[False, False, False], + out_conv=False, + fea_stride=8, + out_channel=128, + in_channels=[64, 128, 256, 512], + cfg=None): + super(ResNetWrapper, self).__init__() + self.cfg = cfg + self.in_channels = in_channels + + self.model = eval(cfg.backbone.resnet)( + pretrained=cfg.backbone.pretrained, + replace_stride_with_dilation=cfg.backbone.replace_stride_with_dilation, in_channels=self.in_channels) + self.out = None + if cfg.backbone.out_conv: + out_channel = 512 + for chan in reversed(self.in_channels): + if chan < 0: continue + out_channel = chan + break + self.out = conv1x1( + out_channel * self.model.expansion, cfg.featuremap_out_channel) + + def forward(self, x): + x = self.model(x) + if self.out: + x = self.out(x) + return x + + +class ResNet(nn.Module): + + def __init__(self, block, layers, zero_init_residual=False, + groups=1, width_per_group=64, replace_stride_with_dilation=None, + norm_layer=None, in_channels=None): + super(ResNet, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d + self._norm_layer = norm_layer + + self.inplanes = 64 + self.dilation = 1 + if replace_stride_with_dilation is None: + # each element in the tuple indicates if we should replace + # the 2x2 stride with a dilated convolution instead + replace_stride_with_dilation = [False, False, False] + if len(replace_stride_with_dilation) != 3: + raise ValueError("replace_stride_with_dilation should be None " + "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = norm_layer(self.inplanes) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.in_channels = in_channels + self.layer1 = self._make_layer(block, in_channels[0], layers[0]) + self.layer2 = self._make_layer(block, in_channels[1], layers[1], stride=2, + dilate=replace_stride_with_dilation[0]) + self.layer3 = self._make_layer(block, in_channels[2], layers[2], stride=2, + dilate=replace_stride_with_dilation[1]) + if in_channels[3] > 0: + self.layer4 = self._make_layer(block, in_channels[3], layers[3], stride=2, + dilate=replace_stride_with_dilation[2]) + self.expansion = block.expansion + + # self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) + # self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu') + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + # Zero-initialize the last BN in each residual branch, + # so that the residual branch starts with zeros, and each residual block behaves like an identity. + # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 + if zero_init_residual: + for m in self.modules(): + if isinstance(m, Bottleneck): + nn.init.constant_(m.bn3.weight, 0) + elif isinstance(m, BasicBlock): + nn.init.constant_(m.bn2.weight, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + conv1x1(self.inplanes, planes * block.expansion, stride), + norm_layer(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation, + norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + if self.in_channels[3] > 0: + x = self.layer4(x) + + # x = self.avgpool(x) + # x = torch.flatten(x, 1) + # x = self.fc(x) + + return x + + +def _resnet(arch, block, layers, pretrained, progress, **kwargs): + model = ResNet(block, layers, **kwargs) + if pretrained: + print('pretrained model: ', model_urls[arch]) + # state_dict = torch.load(model_urls[arch])['net'] + state_dict = load_state_dict_from_url(model_urls[arch]) + model.load_state_dict(state_dict, strict=False) + return model + + +def resnet18(pretrained=False, progress=True, **kwargs): + r"""ResNet-18 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, + **kwargs) + + +def resnet34(pretrained=False, progress=True, **kwargs): + r"""ResNet-34 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet50(pretrained=False, progress=True, **kwargs): + r"""ResNet-50 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, + **kwargs) + + +def resnet101(pretrained=False, progress=True, **kwargs): + r"""ResNet-101 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, + **kwargs) + + +def resnet152(pretrained=False, progress=True, **kwargs): + r"""ResNet-152 model from + `"Deep Residual Learning for Image Recognition" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, + **kwargs) + + +def resnext50_32x4d(pretrained=False, progress=True, **kwargs): + r"""ResNeXt-50 32x4d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 4 + return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def resnext101_32x8d(pretrained=False, progress=True, **kwargs): + r"""ResNeXt-101 32x8d model from + `"Aggregated Residual Transformation for Deep Neural Networks" `_ + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['groups'] = 32 + kwargs['width_per_group'] = 8 + return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) + + +def wide_resnet50_2(pretrained=False, progress=True, **kwargs): + r"""Wide ResNet-50-2 model from + `"Wide Residual Networks" `_ + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3], + pretrained, progress, **kwargs) + + +def wide_resnet101_2(pretrained=False, progress=True, **kwargs): + r"""Wide ResNet-101-2 model from + `"Wide Residual Networks" `_ + + The model is the same as ResNet except for the bottleneck number of channels + which is twice larger in every block. The number of channels in outer 1x1 + convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048 + channels, and in Wide ResNet-50-2 has 2048-1024-2048. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + kwargs['width_per_group'] = 64 * 2 + return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3], + pretrained, progress, **kwargs) diff --git a/lanedet/models/backbone/vgg.py b/lanedet/models/backbone/vgg.py new file mode 100644 index 0000000..80e33f8 --- /dev/null +++ b/lanedet/models/backbone/vgg.py @@ -0,0 +1,110 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url + +from lanedet.models.registry import BACKBONE + +model_urls = { + 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth', + 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', + 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', + 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', + 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth', + 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth', + 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', + 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth', +} + + + +@BACKBONE.register_module +class VGG(nn.Module): + def __init__(self, cfg): + super(VGG, self).__init__() + + self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1, bias=False) + self.bn1_1 = nn.BatchNorm2d(64) + self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1, bias=False) + self.bn1_2 = nn.BatchNorm2d(64) + + self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1, bias=False) + self.bn2_1 = nn.BatchNorm2d(128) + self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1, bias=False) + self.bn2_2 = nn.BatchNorm2d(128) + + self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1, bias=False) + self.bn3_1 = nn.BatchNorm2d(256) + self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1, bias=False) + self.bn3_2 = nn.BatchNorm2d(256) + self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1, bias=False) + self.bn3_3 = nn.BatchNorm2d(256) + + self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1, bias=False) + self.bn4_1 = nn.BatchNorm2d(512) + self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1, bias=False) + self.bn4_2 = nn.BatchNorm2d(512) + self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1, bias=False) + self.bn4_3 = nn.BatchNorm2d(512) + + self.conv5_1 = nn.Conv2d( + 512, 512, 3, padding=2, dilation=2, bias=False) + self.bn5_1 = nn.BatchNorm2d(512) + self.conv5_2 = nn.Conv2d( + 512, 512, 3, padding=2, dilation=2, bias=False) + self.bn5_2 = nn.BatchNorm2d(512) + self.conv5_3 = nn.Conv2d( + 512, 512, 3, padding=2, dilation=2, bias=False) + self.bn5_3 = nn.BatchNorm2d(512) + + self.conv6 = nn.Conv2d(512, 1024, 3, padding=4, dilation=4, bias=False) + self.bn6 = nn.BatchNorm2d(1024) + self.conv7 = nn.Conv2d(1024, 128, 1, bias=False) + self.bn7 = nn.BatchNorm2d(128) + self._initialize_weights() + + + def _initialize_weights(self) -> None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.constant_(m.bias, 0) + + + + def forward(self, x): + """ + return 1/8 feature + """ + x = F.relu(self.bn1_1(self.conv1_1(x))) + x = F.relu(self.bn1_2(self.conv1_2(x))) + x = F.max_pool2d(x, 2, stride=2, padding=0) + + x = F.relu(self.bn2_1(self.conv2_1(x))) + x = F.relu(self.bn2_2(self.conv2_2(x))) + x = F.max_pool2d(x, 2, stride=2, padding=0) + + x = F.relu(self.bn3_1(self.conv3_1(x))) + x = F.relu(self.bn3_2(self.conv3_2(x))) + x = F.relu(self.bn3_3(self.conv3_3(x))) + x = F.max_pool2d(x, 2, stride=2, padding=0) + + x = F.relu(self.bn4_1(self.conv4_1(x))) + x = F.relu(self.bn4_2(self.conv4_2(x))) + x = F.relu(self.bn4_3(self.conv4_3(x))) + + x = F.relu(self.bn5_1(self.conv5_1(x))) + x = F.relu(self.bn5_2(self.conv5_2(x))) + x = F.relu(self.bn5_3(self.conv5_3(x))) + + x = F.relu(self.bn6(self.conv6(x))) + x = F.relu(self.bn7(self.conv7(x))) + + return x diff --git a/lanedet/models/heads/__init__.py b/lanedet/models/heads/__init__.py new file mode 100644 index 0000000..30fd1ff --- /dev/null +++ b/lanedet/models/heads/__init__.py @@ -0,0 +1,4 @@ +from .exist_head import ExistHead +from .lane_cls import LaneCls +from .busd import BUSD +from .plain_decoder import PlainDecoder diff --git a/lanedet/models/heads/busd.py b/lanedet/models/heads/busd.py new file mode 100644 index 0000000..1391ff5 --- /dev/null +++ b/lanedet/models/heads/busd.py @@ -0,0 +1,126 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url + +from ..registry import HEADS + +def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=dilation, groups=groups, bias=False, dilation=dilation) + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) + + +class non_bottleneck_1d(nn.Module): + def __init__(self, chann, dropprob, dilated): + super().__init__() + + self.conv3x1_1 = nn.Conv2d( + chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True) + + self.conv1x3_1 = nn.Conv2d( + chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True) + + self.bn1 = nn.BatchNorm2d(chann, eps=1e-03) + + self.conv3x1_2 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, + dilation=(dilated, 1)) + + self.conv1x3_2 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, + dilation=(1, dilated)) + + self.bn2 = nn.BatchNorm2d(chann, eps=1e-03) + + self.dropout = nn.Dropout2d(dropprob) + + def forward(self, input): + output = self.conv3x1_1(input) + output = F.relu(output) + output = self.conv1x3_1(output) + output = self.bn1(output) + output = F.relu(output) + + output = self.conv3x1_2(output) + output = F.relu(output) + output = self.conv1x3_2(output) + output = self.bn2(output) + + if (self.dropout.p != 0): + output = self.dropout(output) + + # +input = identity (residual connection) + return F.relu(output + input) + + +class UpsamplerBlock(nn.Module): + def __init__(self, ninput, noutput, up_width, up_height): + super().__init__() + + self.conv = nn.ConvTranspose2d( + ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True) + + self.bn = nn.BatchNorm2d(noutput, eps=1e-3, track_running_stats=True) + + self.follows = nn.ModuleList() + self.follows.append(non_bottleneck_1d(noutput, 0, 1)) + self.follows.append(non_bottleneck_1d(noutput, 0, 1)) + + # interpolate + self.up_width = up_width + self.up_height = up_height + self.interpolate_conv = conv1x1(ninput, noutput) + self.interpolate_bn = nn.BatchNorm2d( + noutput, eps=1e-3, track_running_stats=True) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + out = F.relu(output) + for follow in self.follows: + out = follow(out) + + interpolate_output = self.interpolate_conv(input) + interpolate_output = self.interpolate_bn(interpolate_output) + interpolate_output = F.relu(interpolate_output) + + interpolate = F.interpolate(interpolate_output, size=[self.up_height, self.up_width], + mode='bilinear', align_corners=False) + + return out + interpolate + +@HEADS.register_module +class BUSD(nn.Module): + def __init__(self, cfg): + super().__init__() + img_height = cfg.img_height + img_width = cfg.img_width + num_classes = cfg.num_classes + + self.layers = nn.ModuleList() + + self.layers.append(UpsamplerBlock(ninput=128, noutput=64, + up_height=int(img_height)//4, up_width=int(img_width)//4)) + self.layers.append(UpsamplerBlock(ninput=64, noutput=32, + up_height=int(img_height)//2, up_width=int(img_width)//2)) + self.layers.append(UpsamplerBlock(ninput=32, noutput=16, + up_height=int(img_height)//1, up_width=int(img_width)//1)) + + self.output_conv = conv1x1(16, num_classes) + + def forward(self, input): + output = input + + for layer in self.layers: + output = layer(output) + + output = self.output_conv(output) + if not self.training: + output = F.softmax(output, dim=1) + output = {'seg': output} + + return output diff --git a/lanedet/models/heads/exist_head.py b/lanedet/models/heads/exist_head.py new file mode 100644 index 0000000..32d32cb --- /dev/null +++ b/lanedet/models/heads/exist_head.py @@ -0,0 +1,36 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url + +from ..registry import HEADS + +@HEADS.register_module +class ExistHead(nn.Module): + def __init__(self, cfg=None): + super(ExistHead, self).__init__() + self.cfg = cfg + + self.dropout = nn.Dropout2d(0.1) + self.conv8 = nn.Conv2d(cfg.featuremap_out_channel, cfg.num_classes, 1) + + stride = cfg.featuremap_out_stride * 2 + self.fc9 = nn.Linear( + int(cfg.num_classes * cfg.img_width / stride * cfg.img_height / stride), 128) + self.fc10 = nn.Linear(128, cfg.num_classes-1) + + def forward(self, x): + x = self.dropout(x) + x = self.conv8(x) + + x = F.softmax(x, dim=1) + x = F.avg_pool2d(x, 2, stride=2, padding=0) + x = x.view(-1, x.numel() // x.shape[0]) + x = self.fc9(x) + x = F.relu(x) + x = self.fc10(x) + x = torch.sigmoid(x) + + output = {'exist': x} + + return output diff --git a/lanedet/models/heads/lane_cls.py b/lanedet/models/heads/lane_cls.py new file mode 100644 index 0000000..5e948f1 --- /dev/null +++ b/lanedet/models/heads/lane_cls.py @@ -0,0 +1,28 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url +import numpy as np + +from ..registry import HEADS + +@HEADS.register_module +class LaneCls(nn.Module): + def __init__(self, dim, cfg=None): + super(LaneCls, self).__init__() + self.cfg = cfg + chan = cfg.featuremap_out_channel + self.pool = torch.nn.Conv2d(chan, 8, 1) + self.dim = dim + self.total_dim = np.prod(dim) + self.cls = torch.nn.Sequential( + torch.nn.Linear(1800, 2048), + torch.nn.ReLU(), + torch.nn.Linear(2048, self.total_dim), + ) + + def forward(self, x): + x = self.pool(x).view(-1, 1800) + cls = self.cls(x).view(-1, *self.dim) + output = {'cls': cls} + return output diff --git a/lanedet/models/heads/plain_decoder.py b/lanedet/models/heads/plain_decoder.py new file mode 100644 index 0000000..2972800 --- /dev/null +++ b/lanedet/models/heads/plain_decoder.py @@ -0,0 +1,28 @@ +import torch +from torch import nn +import torch.nn.functional as F +from torch.hub import load_state_dict_from_url + +from ..registry import HEADS + +@HEADS.register_module +class PlainDecoder(nn.Module): + def __init__(self, cfg): + super(PlainDecoder, self).__init__() + self.cfg = cfg + + self.dropout = nn.Dropout2d(0.1) + self.conv8 = nn.Conv2d(cfg.featuremap_out_channel, cfg.num_classes, 1) + + def forward(self, x): + x = self.dropout(x) + x = self.conv8(x) + x = F.interpolate(x, size=[self.cfg.img_height, self.cfg.img_width], + mode='bilinear', align_corners=False) + + if not self.training: + x = F.softmax(x, dim=1) + + output = {'seg': x} + + return output diff --git a/lanedet/models/net/__init__.py b/lanedet/models/net/__init__.py new file mode 100644 index 0000000..49254f2 --- /dev/null +++ b/lanedet/models/net/__init__.py @@ -0,0 +1 @@ +from .segmentor import Segmentor diff --git a/lanedet/models/net/segmentor.py b/lanedet/models/net/segmentor.py new file mode 100644 index 0000000..5942483 --- /dev/null +++ b/lanedet/models/net/segmentor.py @@ -0,0 +1,35 @@ +import torch.nn as nn +import torch + +from lanedet.models.registry import NET +from ..registry import build_backbone, build_aggregator, build_heads + + +@NET.register_module +class Segmentor(nn.Module): + def __init__(self, cfg): + super(Segmentor, self).__init__() + self.cfg = cfg + self.backbone = build_backbone(cfg) + self.aggregator = build_aggregator(cfg) if cfg.aggregator else None + # self.decoder = build_decoder(cfg) if cfg.decoder else None + self.heads = build_heads(cfg) if cfg.heads else None + + def forward(self, batch): + output = {} + fea = self.backbone(batch) + + if self.aggregator: + fea = self.aggregator(fea) + + # if self.decoder: + # seg = self.decoder(fea) + # output.update(seg) + + if self.heads: + for head in self.heads: + output.update(head(fea)) + # head = self.heads(fea) + # output.update(head) + + return output diff --git a/lanedet/models/registry.py b/lanedet/models/registry.py new file mode 100644 index 0000000..fef7fcb --- /dev/null +++ b/lanedet/models/registry.py @@ -0,0 +1,33 @@ +from lanedet.utils import Registry, build_from_cfg +import torch.nn as nn + +BACKBONE = Registry('backbone') +AGGREGATOR = Registry('aggregator') +HEADS = Registry('heads') +NET = Registry('net') + +def build(cfg, registry, default_args=None): + if isinstance(cfg, list): + modules = [ + build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg + ] + return nn.Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + + +def build_backbone(cfg): + return build(cfg.backbone, BACKBONE, default_args=dict(cfg=cfg)) + +def build_aggregator(cfg): + return build(cfg.aggregator, AGGREGATOR, default_args=dict(cfg=cfg)) + +def build_heads(cfg): + return build(cfg.heads, HEADS, default_args=dict(cfg=cfg)) + heads = [] + for head_cfg in cfg.heads: + heads.append(build(head_cfg, HEADS, default_args=dict(cfg=cfg))) + return heads + +def build_net(cfg): + return build(cfg.net, NET, default_args=dict(cfg=cfg)) diff --git a/lanedet/runner/__init__.py b/lanedet/runner/__init__.py new file mode 100644 index 0000000..924425f --- /dev/null +++ b/lanedet/runner/__init__.py @@ -0,0 +1,4 @@ +from .evaluator import * +from .trainer import * + +from .registry import build_evaluator diff --git a/lanedet/runner/evaluator/__init__.py b/lanedet/runner/evaluator/__init__.py new file mode 100644 index 0000000..308528c --- /dev/null +++ b/lanedet/runner/evaluator/__init__.py @@ -0,0 +1,2 @@ +from .tusimple.tusimple import Tusimple +from .culane.culane import CULane diff --git a/lanedet/runner/evaluator/culane/culane.py b/lanedet/runner/evaluator/culane/culane.py new file mode 100644 index 0000000..665ef87 --- /dev/null +++ b/lanedet/runner/evaluator/culane/culane.py @@ -0,0 +1,150 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F +from lanedet.runner.utils.logger import get_logger + +from lanedet.runner.registry import EVALUATOR +import json +import os +import subprocess +from shutil import rmtree +import cv2 +import numpy as np + +def check(): + import subprocess + import sys + FNULL = open(os.devnull, 'w') + result = subprocess.call( + './lanedet/runner/evaluator/culane/lane_evaluation/evaluate', stdout=FNULL, stderr=FNULL) + if result > 1: + print('There is something wrong with evaluate tool, please compile it.') + sys.exit() + +def read_helper(path): + lines = open(path, 'r').readlines()[1:] + lines = ' '.join(lines) + values = lines.split(' ')[1::2] + keys = lines.split(' ')[0::2] + keys = [key[:-1] for key in keys] + res = {k : v for k,v in zip(keys,values)} + return res + +def call_culane_eval(data_dir, output_path='./output'): + if data_dir[-1] != '/': + data_dir = data_dir + '/' + detect_dir=os.path.join(output_path, 'lines')+'/' + + w_lane=30 + iou=0.5; # Set iou to 0.3 or 0.5 + im_w=1640 + im_h=590 + frame=1 + list0 = os.path.join(data_dir,'list/test_split/test0_normal.txt') + list1 = os.path.join(data_dir,'list/test_split/test1_crowd.txt') + list2 = os.path.join(data_dir,'list/test_split/test2_hlight.txt') + list3 = os.path.join(data_dir,'list/test_split/test3_shadow.txt') + list4 = os.path.join(data_dir,'list/test_split/test4_noline.txt') + list5 = os.path.join(data_dir,'list/test_split/test5_arrow.txt') + list6 = os.path.join(data_dir,'list/test_split/test6_curve.txt') + list7 = os.path.join(data_dir,'list/test_split/test7_cross.txt') + list8 = os.path.join(data_dir,'list/test_split/test8_night.txt') + if not os.path.exists(os.path.join(output_path,'txt')): + os.mkdir(os.path.join(output_path,'txt')) + out0 = os.path.join(output_path,'txt','out0_normal.txt') + out1 = os.path.join(output_path,'txt','out1_crowd.txt') + out2 = os.path.join(output_path,'txt','out2_hlight.txt') + out3 = os.path.join(output_path,'txt','out3_shadow.txt') + out4 = os.path.join(output_path,'txt','out4_noline.txt') + out5 = os.path.join(output_path,'txt','out5_arrow.txt') + out6 = os.path.join(output_path,'txt','out6_curve.txt') + out7 = os.path.join(output_path,'txt','out7_cross.txt') + out8 = os.path.join(output_path,'txt','out8_night.txt') + + eval_cmd = './lanedet/runner/evaluator/culane/lane_evaluation/evaluate' + + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list0,w_lane,iou,im_w,im_h,frame,out0)) + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list1,w_lane,iou,im_w,im_h,frame,out1)) + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list2,w_lane,iou,im_w,im_h,frame,out2)) + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list3,w_lane,iou,im_w,im_h,frame,out3)) + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list4,w_lane,iou,im_w,im_h,frame,out4)) + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list5,w_lane,iou,im_w,im_h,frame,out5)) + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list6,w_lane,iou,im_w,im_h,frame,out6)) + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list7,w_lane,iou,im_w,im_h,frame,out7)) + os.system('%s -a %s -d %s -i %s -l %s -w %s -t %s -c %s -r %s -f %s -o %s'%(eval_cmd,data_dir,detect_dir,data_dir,list8,w_lane,iou,im_w,im_h,frame,out8)) + res_all = {} + res_all['normal'] = read_helper(out0) + res_all['crowd']= read_helper(out1) + res_all['night']= read_helper(out8) + res_all['noline'] = read_helper(out4) + res_all['shadow'] = read_helper(out3) + res_all['arrow']= read_helper(out5) + res_all['hlight'] = read_helper(out2) + res_all['curve']= read_helper(out6) + res_all['cross']= read_helper(out7) + return res_all + +@EVALUATOR.register_module +class CULane(nn.Module): + def __init__(self, cfg): + super(CULane, self).__init__() + # Firstly, check the evaluation tool + check() + self.cfg = cfg + self.logger = get_logger('lanedet') + self.out_dir = os.path.join(self.cfg.work_dir, 'lines') + if cfg.view: + self.view_dir = os.path.join(self.cfg.work_dir, 'vis') + + def evaluate(self, dataset, output, batch): + img_name = batch['meta']['img_name'] + img_path = batch['meta']['full_img_path'] + # batch, num_lane, lane coords + res = dataset.get_lane(output) + for idx, (lanes, name) in enumerate(zip(res, img_name)): + outname = self.out_dir + name[:-4] + '.lines.txt' + outdir = os.path.dirname(outname) + if not os.path.exists(outdir): + os.makedirs(outdir) + f = open(outname, 'w') + for lane in lanes: + for x, y in lane: + if x < 0 or y < 0: + continue + f.write('%d %d ' % (x, y)) + f.write('\n') + f.close() + + if self.cfg.view: + img = cv2.imread(img_path[idx]).astype(np.float32) + dataset.view(img, lanes, self.view_dir+img_name[idx]) + + def summarize(self): + self.logger.info('summarize result...') + eval_list_path = os.path.join( + self.cfg.dataset_path, "list", self.cfg.dataset.val.data_list) + res = call_culane_eval(self.cfg.dataset_path, output_path=self.cfg.work_dir) + TP,FP,FN = 0,0,0 + out_str = 'Copypaste: ' + for k, v in res.items(): + val = float(v['Fmeasure']) if 'nan' not in v['Fmeasure'] else 0 + val_tp, val_fp, val_fn = int(v['tp']), int(v['fp']), int(v['fn']) + val_p, val_r, val_f1 = float(v['precision']), float(v['recall']), float(v['Fmeasure']) + TP += val_tp + FP += val_fp + FN += val_fn + self.logger.info(k + ': ' + str(v)) + out_str += k + for metric, value in v.items(): + out_str += ' ' + str(value).rstrip('\n') + out_str += ' ' + P = TP * 1.0 / (TP + FP + 1e-9) + R = TP * 1.0 / (TP + FN + 1e-9) + F = 2*P*R/(P + R + 1e-9) + overall_result_str = ('Overall Precision: %f Recall: %f F1: %f' % (P, R, F)) + self.logger.info(overall_result_str) + out_str = out_str + overall_result_str + self.logger.info(out_str) + + # delete the tmp output + rmtree(self.out_dir) diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/.gitignore b/lanedet/runner/evaluator/culane/lane_evaluation/.gitignore new file mode 100644 index 0000000..b501d98 --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/.gitignore @@ -0,0 +1,2 @@ +build/ +evaluate diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/Makefile b/lanedet/runner/evaluator/culane/lane_evaluation/Makefile new file mode 100755 index 0000000..becffa0 --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/Makefile @@ -0,0 +1,50 @@ +PROJECT_NAME:= evaluate + +# config ---------------------------------- +OPENCV_VERSION := 2 + +INCLUDE_DIRS := include +LIBRARY_DIRS := lib /usr/local/lib + +COMMON_FLAGS := -DCPU_ONLY +CXXFLAGS := -std=c++11 -fopenmp +LDFLAGS := -fopenmp -Wl,-rpath,./lib +BUILD_DIR := build + + +# make rules ------------------------------- +CXX ?= g++ +BUILD_DIR ?= ./build + +LIBRARIES += opencv_core opencv_highgui opencv_imgproc +ifeq ($(OPENCV_VERSION), 3) + LIBRARIES += opencv_imgcodecs +endif + +CXXFLAGS += $(COMMON_FLAGS) $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) +LDFLAGS += $(COMMON_FLAGS) $(foreach includedir,$(LIBRARY_DIRS),-L$(includedir)) $(foreach library,$(LIBRARIES),-l$(library)) +SRC_DIRS += $(shell find * -type d -exec bash -c "find {} -maxdepth 1 \( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print) +CXX_SRCS += $(shell find src/ -name "*.cpp") +CXX_TARGETS:=$(patsubst %.cpp, $(BUILD_DIR)/%.o, $(CXX_SRCS)) +ALL_BUILD_DIRS := $(sort $(BUILD_DIR) $(addprefix $(BUILD_DIR)/, $(SRC_DIRS))) + +.PHONY: all +all: $(PROJECT_NAME) + +.PHONY: $(ALL_BUILD_DIRS) +$(ALL_BUILD_DIRS): + @mkdir -p $@ + +$(BUILD_DIR)/%.o: %.cpp | $(ALL_BUILD_DIRS) + @echo "CXX" $< + @$(CXX) $(CXXFLAGS) -c -o $@ $< + +$(PROJECT_NAME): $(CXX_TARGETS) + @echo "CXX/LD" $@ + @$(CXX) -o $@ $^ $(LDFLAGS) + +.PHONY: clean +clean: + @rm -rf $(CXX_TARGETS) + @rm -rf $(PROJECT_NAME) + @rm -rf $(BUILD_DIR) diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/calTotal.m b/lanedet/runner/evaluator/culane/lane_evaluation/calTotal.m new file mode 100755 index 0000000..a0eeb08 --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/calTotal.m @@ -0,0 +1,23 @@ +%% Calculate overall Fmeasure from each scenarios +clc; clear; close all; + +allFile = 'output/vgg_SCNN_DULR_w9_iou0.5.txt'; + +all = textread(allFile,'%s'); +TP = 0; +FP = 0; +FN = 0; + +for i=1:9 + tpline = (i-1)*14+4; + tp = str2double(all(tpline)); + fp = str2double(all(tpline+2)); + fn = str2double(all(tpline+4)); + TP = TP + tp; + FP = FP + fp; + FN = FN + fn; +end + +P = TP/(TP + FP) +R = TP/(TP + FN) +F = 2*P*R/(P + R)*100 diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/include/counter.hpp b/lanedet/runner/evaluator/culane/lane_evaluation/include/counter.hpp new file mode 100644 index 0000000..430e1d4 --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/include/counter.hpp @@ -0,0 +1,47 @@ +#ifndef COUNTER_HPP +#define COUNTER_HPP + +#include "lane_compare.hpp" +#include "hungarianGraph.hpp" +#include +#include +#include +#include +#include + +using namespace std; +using namespace cv; + +// before coming to use functions of this class, the lanes should resize to im_width and im_height using resize_lane() in lane_compare.hpp +class Counter +{ + public: + Counter(int _im_width, int _im_height, double _iou_threshold=0.4, int _lane_width=10):tp(0),fp(0),fn(0){ + im_width = _im_width; + im_height = _im_height; + sim_threshold = _iou_threshold; + lane_compare = new LaneCompare(_im_width, _im_height, _lane_width, LaneCompare::IOU); + }; + double get_precision(void); + double get_recall(void); + long getTP(void); + long getFP(void); + long getFN(void); + void setTP(long); + void setFP(long); + void setFN(long); + // direct add tp, fp, tn and fn + // first match with hungarian + tuple, long, long, long, long> count_im_pair(const vector > &anno_lanes, const vector > &detect_lanes); + void makeMatch(const vector > &similarity, vector &match1, vector &match2); + + private: + double sim_threshold; + int im_width; + int im_height; + long tp; + long fp; + long fn; + LaneCompare *lane_compare; +}; +#endif diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/include/hungarianGraph.hpp b/lanedet/runner/evaluator/culane/lane_evaluation/include/hungarianGraph.hpp new file mode 100644 index 0000000..40c3ead --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/include/hungarianGraph.hpp @@ -0,0 +1,71 @@ +#ifndef HUNGARIAN_GRAPH_HPP +#define HUNGARIAN_GRAPH_HPP +#include +using namespace std; + +struct pipartiteGraph { + vector > mat; + vector leftUsed, rightUsed; + vector leftWeight, rightWeight; + vectorrightMatch, leftMatch; + int leftNum, rightNum; + bool matchDfs(int u) { + leftUsed[u] = true; + for (int v = 0; v < rightNum; v++) { + if (!rightUsed[v] && fabs(leftWeight[u] + rightWeight[v] - mat[u][v]) < 1e-2) { + rightUsed[v] = true; + if (rightMatch[v] == -1 || matchDfs(rightMatch[v])) { + rightMatch[v] = u; + leftMatch[u] = v; + return true; + } + } + } + return false; + } + void resize(int leftNum, int rightNum) { + this->leftNum = leftNum; + this->rightNum = rightNum; + leftMatch.resize(leftNum); + rightMatch.resize(rightNum); + leftUsed.resize(leftNum); + rightUsed.resize(rightNum); + leftWeight.resize(leftNum); + rightWeight.resize(rightNum); + mat.resize(leftNum); + for (int i = 0; i < leftNum; i++) mat[i].resize(rightNum); + } + void match() { + for (int i = 0; i < leftNum; i++) leftMatch[i] = -1; + for (int i = 0; i < rightNum; i++) rightMatch[i] = -1; + for (int i = 0; i < rightNum; i++) rightWeight[i] = 0; + for (int i = 0; i < leftNum; i++) { + leftWeight[i] = -1e5; + for (int j = 0; j < rightNum; j++) { + if (leftWeight[i] < mat[i][j]) leftWeight[i] = mat[i][j]; + } + } + + for (int u = 0; u < leftNum; u++) { + while (1) { + for (int i = 0; i < leftNum; i++) leftUsed[i] = false; + for (int i = 0; i < rightNum; i++) rightUsed[i] = false; + if (matchDfs(u)) break; + double d = 1e10; + for (int i = 0; i < leftNum; i++) { + if (leftUsed[i] ) { + for (int j = 0; j < rightNum; j++) { + if (!rightUsed[j]) d = min(d, leftWeight[i] + rightWeight[j] - mat[i][j]); + } + } + } + if (d == 1e10) return ; + for (int i = 0; i < leftNum; i++) if (leftUsed[i]) leftWeight[i] -= d; + for (int i = 0; i < rightNum; i++) if (rightUsed[i]) rightWeight[i] += d; + } + } + } +}; + + +#endif // HUNGARIAN_GRAPH_HPP diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/include/lane_compare.hpp b/lanedet/runner/evaluator/culane/lane_evaluation/include/lane_compare.hpp new file mode 100644 index 0000000..02ddfce --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/include/lane_compare.hpp @@ -0,0 +1,51 @@ +#ifndef LANE_COMPARE_HPP +#define LANE_COMPARE_HPP + +#include "spline.hpp" +#include +#include +#include +#include + +#if CV_VERSION_EPOCH == 2 +#define OPENCV2 +#elif CV_VERSION_MAJOR == 3 +#define OPENCV3 +#else +#error Not support this OpenCV version +#endif + +#ifdef OPENCV3 +#include +#elif defined(OPENCV2) +#include +#endif + +using namespace std; +using namespace cv; + +class LaneCompare{ + public: + enum CompareMode{ + IOU, + Caltech + }; + + LaneCompare(int _im_width, int _im_height, int _lane_width = 10, CompareMode _compare_mode = IOU){ + im_width = _im_width; + im_height = _im_height; + compare_mode = _compare_mode; + lane_width = _lane_width; + } + + double get_lane_similarity(const vector &lane1, const vector &lane2); + void resize_lane(vector &curr_lane, int curr_width, int curr_height); + private: + CompareMode compare_mode; + int im_width; + int im_height; + int lane_width; + Spline splineSolver; +}; + +#endif diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/include/spline.hpp b/lanedet/runner/evaluator/culane/lane_evaluation/include/spline.hpp new file mode 100644 index 0000000..0ae73ef --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/include/spline.hpp @@ -0,0 +1,28 @@ +#ifndef SPLINE_HPP +#define SPLINE_HPP +#include +#include +#include +#include + +using namespace cv; +using namespace std; + +struct Func { + double a_x; + double b_x; + double c_x; + double d_x; + double a_y; + double b_y; + double c_y; + double d_y; + double h; +}; +class Spline { +public: + vector splineInterpTimes(const vector &tmp_line, int times); + vector splineInterpStep(vector tmp_line, double step); + vector cal_fun(const vector &point_v); +}; +#endif diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/src/counter.cpp b/lanedet/runner/evaluator/culane/lane_evaluation/src/counter.cpp new file mode 100644 index 0000000..f4fa6a7 --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/src/counter.cpp @@ -0,0 +1,134 @@ +/************************************************************************* + > File Name: counter.cpp + > Author: Xingang Pan, Jun Li + > Mail: px117@ie.cuhk.edu.hk + > Created Time: Thu Jul 14 20:23:08 2016 + ************************************************************************/ + +#include "counter.hpp" + +double Counter::get_precision(void) +{ + cerr<<"tp: "<, long, long, long, long> Counter::count_im_pair(const vector > &anno_lanes, const vector > &detect_lanes) +{ + vector anno_match(anno_lanes.size(), -1); + vector detect_match; + if(anno_lanes.empty()) + { + return make_tuple(anno_match, 0, detect_lanes.size(), 0, 0); + } + + if(detect_lanes.empty()) + { + return make_tuple(anno_match, 0, 0, 0, anno_lanes.size()); + } + // hungarian match first + + // first calc similarity matrix + vector > similarity(anno_lanes.size(), vector(detect_lanes.size(), 0)); + for(int i=0; i &curr_anno_lane = anno_lanes[i]; + for(int j=0; j &curr_detect_lane = detect_lanes[j]; + similarity[i][j] = lane_compare->get_lane_similarity(curr_anno_lane, curr_detect_lane); + } + } + + + + makeMatch(similarity, anno_match, detect_match); + + + int curr_tp = 0; + // count and add + for(int i=0; i=0 && similarity[i][anno_match[i]] > sim_threshold) + { + curr_tp++; + } + else + { + anno_match[i] = -1; + } + } + int curr_fn = anno_lanes.size() - curr_tp; + int curr_fp = detect_lanes.size() - curr_tp; + return make_tuple(anno_match, curr_tp, curr_fp, 0, curr_fn); +} + + +void Counter::makeMatch(const vector > &similarity, vector &match1, vector &match2) { + int m = similarity.size(); + int n = similarity[0].size(); + pipartiteGraph gra; + bool have_exchange = false; + if (m > n) { + have_exchange = true; + swap(m, n); + } + gra.resize(m, n); + for (int i = 0; i < gra.leftNum; i++) { + for (int j = 0; j < gra.rightNum; j++) { + if(have_exchange) + gra.mat[i][j] = similarity[j][i]; + else + gra.mat[i][j] = similarity[i][j]; + } + } + gra.match(); + match1 = gra.leftMatch; + match2 = gra.rightMatch; + if (have_exchange) swap(match1, match2); +} diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/src/evaluate.cpp b/lanedet/runner/evaluator/culane/lane_evaluation/src/evaluate.cpp new file mode 100644 index 0000000..ae95bb4 --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/src/evaluate.cpp @@ -0,0 +1,302 @@ +/************************************************************************* + > File Name: evaluate.cpp + > Author: Xingang Pan, Jun Li + > Mail: px117@ie.cuhk.edu.hk + > Created Time: 2016年07月14日 星期四 18时28分45秒 + ************************************************************************/ + +#include "counter.hpp" +#include "spline.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; +using namespace cv; + +void help(void) { + cout << "./evaluate [OPTIONS]" << endl; + cout << "-h : print usage help" << endl; + cout << "-a : directory for annotation files (default: " + "/data/driving/eval_data/anno_label/)" << endl; + cout << "-d : directory for detection files (default: " + "/data/driving/eval_data/predict_label/)" << endl; + cout << "-i : directory for image files (default: " + "/data/driving/eval_data/img/)" << endl; + cout << "-l : list of images used for evaluation (default: " + "/data/driving/eval_data/img/all.txt)" << endl; + cout << "-w : width of the lanes (default: 10)" << endl; + cout << "-t : threshold of iou (default: 0.4)" << endl; + cout << "-c : cols (max image width) (default: 1920)" + << endl; + cout << "-r : rows (max image height) (default: 1080)" + << endl; + cout << "-s : show visualization" << endl; + cout << "-f : start frame in the test set (default: 1)" + << endl; +} + +void read_lane_file(const string &file_name, vector> &lanes); +void visualize(string &full_im_name, vector> &anno_lanes, + vector> &detect_lanes, vector anno_match, + int width_lane, string save_path = ""); + +int main(int argc, char **argv) { + // process params + string anno_dir = "/data/driving/eval_data/anno_label/"; + string detect_dir = "/data/driving/eval_data/predict_label/"; + string im_dir = "/data/driving/eval_data/img/"; + string list_im_file = "/data/driving/eval_data/img/all.txt"; + string output_file = "./output.txt"; + int width_lane = 10; + double iou_threshold = 0.4; + int im_width = 1920; + int im_height = 1080; + int oc; + bool show = false; + int frame = 1; + string save_path = ""; + while ((oc = getopt(argc, argv, "ha:d:i:l:w:t:c:r:sf:o:p:")) != -1) { + switch (oc) { + case 'h': + help(); + return 0; + case 'a': + anno_dir = optarg; + break; + case 'd': + detect_dir = optarg; + break; + case 'i': + im_dir = optarg; + break; + case 'l': + list_im_file = optarg; + break; + case 'w': + width_lane = atoi(optarg); + break; + case 't': + iou_threshold = atof(optarg); + break; + case 'c': + im_width = atoi(optarg); + break; + case 'r': + im_height = atoi(optarg); + break; + case 's': + show = true; + break; + case 'p': + save_path = optarg; + break; + case 'f': + frame = atoi(optarg); + break; + case 'o': + output_file = optarg; + break; + } + } + + cout << "------------Configuration---------" << endl; + cout << "anno_dir: " << anno_dir << endl; + cout << "detect_dir: " << detect_dir << endl; + cout << "im_dir: " << im_dir << endl; + cout << "list_im_file: " << list_im_file << endl; + cout << "width_lane: " << width_lane << endl; + cout << "iou_threshold: " << iou_threshold << endl; + cout << "im_width: " << im_width << endl; + cout << "im_height: " << im_height << endl; + cout << "-----------------------------------" << endl; + cout << "Evaluating the results..." << endl; + // this is the max_width and max_height + + if (width_lane < 1) { + cerr << "width_lane must be positive" << endl; + help(); + return 1; + } + + ifstream ifs_im_list(list_im_file, ios::in); + if (ifs_im_list.fail()) { + cerr << "Error: file " << list_im_file << " not exist!" << endl; + return 1; + } + + Counter counter(im_width, im_height, iou_threshold, width_lane); + + vector anno_match; + string sub_im_name; + // pre-load filelist + vector filelists; + while (getline(ifs_im_list, sub_im_name)) { + filelists.push_back(sub_im_name); + } + ifs_im_list.close(); + + vector, long, long, long, long>> tuple_lists; + tuple_lists.resize(filelists.size()); + +#pragma omp parallel for + for (size_t i = 0; i < filelists.size(); i++) { + auto sub_im_name = filelists[i]; + string full_im_name = im_dir + sub_im_name; + string sub_txt_name = + sub_im_name.substr(0, sub_im_name.find_last_of(".")) + ".lines.txt"; + string anno_file_name = anno_dir + sub_txt_name; + string detect_file_name = detect_dir + sub_txt_name; + vector> anno_lanes; + vector> detect_lanes; + read_lane_file(anno_file_name, anno_lanes); + read_lane_file(detect_file_name, detect_lanes); + // cerr<(tuple_lists[i]); + visualize(full_im_name, anno_lanes, detect_lanes, anno_match, width_lane); + waitKey(0); + } + if (save_path != "") { + auto anno_match = get<0>(tuple_lists[i]); + visualize(full_im_name, anno_lanes, detect_lanes, anno_match, width_lane, + save_path); + } + } + + long tp = 0, fp = 0, tn = 0, fn = 0; + for (auto result : tuple_lists) { + tp += get<1>(result); + fp += get<2>(result); + // tn = get<3>(result); + fn += get<4>(result); + } + counter.setTP(tp); + counter.setFP(fp); + counter.setFN(fn); + + double precision = counter.get_precision(); + double recall = counter.get_recall(); + double F = 2 * precision * recall / (precision + recall); + cerr << "finished process file" << endl; + cout << "precision: " << precision << endl; + cout << "recall: " << recall << endl; + cout << "Fmeasure: " << F << endl; + cout << "----------------------------------" << endl; + + ofstream ofs_out_file; + ofs_out_file.open(output_file, ios::out); + ofs_out_file << "file: " << output_file << endl; + ofs_out_file << "tp: " << counter.getTP() << " fp: " << counter.getFP() + << " fn: " << counter.getFN() << endl; + ofs_out_file << "precision: " << precision << endl; + ofs_out_file << "recall: " << recall << endl; + ofs_out_file << "Fmeasure: " << F << endl << endl; + ofs_out_file.close(); + return 0; +} + +void read_lane_file(const string &file_name, vector> &lanes) { + lanes.clear(); + ifstream ifs_lane(file_name, ios::in); + if (ifs_lane.fail()) { + return; + } + + string str_line; + while (getline(ifs_lane, str_line)) { + vector curr_lane; + stringstream ss; + ss << str_line; + double x, y; + while (ss >> x >> y) { + curr_lane.push_back(Point2f(x, y)); + } + lanes.push_back(curr_lane); + } + + ifs_lane.close(); +} + +void visualize(string &full_im_name, vector> &anno_lanes, + vector> &detect_lanes, vector anno_match, + int width_lane, string save_path) { + Mat img = imread(full_im_name, 1); + Mat img2 = imread(full_im_name, 1); + vector curr_lane; + vector p_interp; + Spline splineSolver; + Scalar color_B = Scalar(255, 0, 0); + Scalar color_G = Scalar(0, 255, 0); + Scalar color_R = Scalar(0, 0, 255); + Scalar color_P = Scalar(255, 0, 255); + Scalar color; + for (int i = 0; i < anno_lanes.size(); i++) { + curr_lane = anno_lanes[i]; + if (curr_lane.size() == 2) { + p_interp = curr_lane; + } else { + p_interp = splineSolver.splineInterpTimes(curr_lane, 50); + } + if (anno_match[i] >= 0) { + color = color_G; + } else { + color = color_G; + } + for (int n = 0; n < p_interp.size() - 1; n++) { + line(img, p_interp[n], p_interp[n + 1], color, width_lane); + line(img2, p_interp[n], p_interp[n + 1], color, 2); + } + } + bool detected; + for (int i = 0; i < detect_lanes.size(); i++) { + detected = false; + curr_lane = detect_lanes[i]; + if (curr_lane.size() == 2) { + p_interp = curr_lane; + } else { + p_interp = splineSolver.splineInterpTimes(curr_lane, 50); + } + for (int n = 0; n < anno_lanes.size(); n++) { + if (anno_match[n] == i) { + detected = true; + break; + } + } + if (detected == true) { + color = color_B; + } else { + color = color_R; + } + for (int n = 0; n < p_interp.size() - 1; n++) { + line(img, p_interp[n], p_interp[n + 1], color, width_lane); + line(img2, p_interp[n], p_interp[n + 1], color, 2); + } + } + if (save_path != "") { + size_t pos = 0; + string s = full_im_name; + std::string token; + std::string delimiter = "/"; + vector names; + while ((pos = s.find(delimiter)) != std::string::npos) { + token = s.substr(0, pos); + names.emplace_back(token); + s.erase(0, pos + delimiter.length()); + } + names.emplace_back(s); + string file_name = names[3] + '_' + names[4] + '_' + names[5]; + // cout << file_name << endl; + imwrite(save_path + '/' + file_name, img); + } else { + namedWindow("visualize", 1); + imshow("visualize", img); + namedWindow("visualize2", 1); + imshow("visualize2", img2); + } +} diff --git a/lanedet/runner/evaluator/culane/lane_evaluation/src/lane_compare.cpp b/lanedet/runner/evaluator/culane/lane_evaluation/src/lane_compare.cpp new file mode 100644 index 0000000..83d08b9 --- /dev/null +++ b/lanedet/runner/evaluator/culane/lane_evaluation/src/lane_compare.cpp @@ -0,0 +1,73 @@ +/************************************************************************* + > File Name: lane_compare.cpp + > Author: Xingang Pan, Jun Li + > Mail: px117@ie.cuhk.edu.hk + > Created Time: Fri Jul 15 10:26:32 2016 + ************************************************************************/ + +#include "lane_compare.hpp" + +double LaneCompare::get_lane_similarity(const vector &lane1, const vector &lane2) +{ + if(lane1.size()<2 || lane2.size()<2) + { + cerr<<"lane size must be greater or equal to 2"< p_interp1; + vector p_interp2; + if(lane1.size() == 2) + { + p_interp1 = lane1; + } + else + { + p_interp1 = splineSolver.splineInterpTimes(lane1, 50); + } + + if(lane2.size() == 2) + { + p_interp2 = lane2; + } + else + { + p_interp2 = splineSolver.splineInterpTimes(lane2, 50); + } + + Scalar color_white = Scalar(1); + for(int n=0; n &curr_lane, int curr_width, int curr_height) +{ + if(curr_width == im_width && curr_height == im_height) + { + return; + } + double x_scale = im_width/(double)curr_width; + double y_scale = im_height/(double)curr_height; + for(int n=0; n +#include +#include "spline.hpp" +using namespace std; +using namespace cv; + +vector Spline::splineInterpTimes(const vector& tmp_line, int times) { + vector res; + + if(tmp_line.size() == 2) { + double x1 = tmp_line[0].x; + double y1 = tmp_line[0].y; + double x2 = tmp_line[1].x; + double y2 = tmp_line[1].y; + + for (int k = 0; k <= times; k++) { + double xi = x1 + double((x2 - x1) * k) / times; + double yi = y1 + double((y2 - y1) * k) / times; + res.push_back(Point2f(xi, yi)); + } + } + + else if(tmp_line.size() > 2) + { + vector tmp_func; + tmp_func = this->cal_fun(tmp_line); + if (tmp_func.empty()) { + cout << "in splineInterpTimes: cal_fun failed" << endl; + return res; + } + for(int j = 0; j < tmp_func.size(); j++) + { + double delta = tmp_func[j].h / times; + for(int k = 0; k < times; k++) + { + double t1 = delta*k; + double x1 = tmp_func[j].a_x + tmp_func[j].b_x*t1 + tmp_func[j].c_x*pow(t1,2) + tmp_func[j].d_x*pow(t1,3); + double y1 = tmp_func[j].a_y + tmp_func[j].b_y*t1 + tmp_func[j].c_y*pow(t1,2) + tmp_func[j].d_y*pow(t1,3); + res.push_back(Point2f(x1, y1)); + } + } + res.push_back(tmp_line[tmp_line.size() - 1]); + } + else { + cerr << "in splineInterpTimes: not enough points" << endl; + } + return res; +} +vector Spline::splineInterpStep(vector tmp_line, double step) { + vector res; + /* + if (tmp_line.size() == 2) { + double x1 = tmp_line[0].x; + double y1 = tmp_line[0].y; + double x2 = tmp_line[1].x; + double y2 = tmp_line[1].y; + + for (double yi = std::min(y1, y2); yi < std::max(y1, y2); yi += step) { + double xi; + if (yi == y1) xi = x1; + else xi = (x2 - x1) / (y2 - y1) * (yi - y1) + x1; + res.push_back(Point2f(xi, yi)); + } + }*/ + if (tmp_line.size() == 2) { + double x1 = tmp_line[0].x; + double y1 = tmp_line[0].y; + double x2 = tmp_line[1].x; + double y2 = tmp_line[1].y; + tmp_line[1].x = (x1 + x2) / 2; + tmp_line[1].y = (y1 + y2) / 2; + tmp_line.push_back(Point2f(x2, y2)); + } + if (tmp_line.size() > 2) { + vector tmp_func; + tmp_func = this->cal_fun(tmp_line); + double ystart = tmp_line[0].y; + double yend = tmp_line[tmp_line.size() - 1].y; + bool down; + if (ystart < yend) down = 1; + else down = 0; + if (tmp_func.empty()) { + cerr << "in splineInterpStep: cal_fun failed" << endl; + } + + for(int j = 0; j < tmp_func.size(); j++) + { + for(double t1 = 0; t1 < tmp_func[j].h; t1 += step) + { + double x1 = tmp_func[j].a_x + tmp_func[j].b_x*t1 + tmp_func[j].c_x*pow(t1,2) + tmp_func[j].d_x*pow(t1,3); + double y1 = tmp_func[j].a_y + tmp_func[j].b_y*t1 + tmp_func[j].c_y*pow(t1,2) + tmp_func[j].d_y*pow(t1,3); + res.push_back(Point2f(x1, y1)); + } + } + res.push_back(tmp_line[tmp_line.size() - 1]); + } + else { + cerr << "in splineInterpStep: not enough points" << endl; + } + return res; +} + +vector Spline::cal_fun(const vector &point_v) +{ + vector func_v; + int n = point_v.size(); + if(n<=2) { + cout << "in cal_fun: point number less than 3" << endl; + return func_v; + } + + func_v.resize(point_v.size()-1); + + vector Mx(n); + vector My(n); + vector A(n-2); + vector B(n-2); + vector C(n-2); + vector Dx(n-2); + vector Dy(n-2); + vector h(n-1); + //vector func_v(n-1); + + for(int i = 0; i < n-1; i++) + { + h[i] = sqrt(pow(point_v[i+1].x - point_v[i].x, 2) + pow(point_v[i+1].y - point_v[i].y, 2)); + } + + for(int i = 0; i < n-2; i++) + { + A[i] = h[i]; + B[i] = 2*(h[i]+h[i+1]); + C[i] = h[i+1]; + + Dx[i] = 6*( (point_v[i+2].x - point_v[i+1].x)/h[i+1] - (point_v[i+1].x - point_v[i].x)/h[i] ); + Dy[i] = 6*( (point_v[i+2].y - point_v[i+1].y)/h[i+1] - (point_v[i+1].y - point_v[i].y)/h[i] ); + } + + //TDMA + C[0] = C[0] / B[0]; + Dx[0] = Dx[0] / B[0]; + Dy[0] = Dy[0] / B[0]; + for(int i = 1; i < n-2; i++) + { + double tmp = B[i] - A[i]*C[i-1]; + C[i] = C[i] / tmp; + Dx[i] = (Dx[i] - A[i]*Dx[i-1]) / tmp; + Dy[i] = (Dy[i] - A[i]*Dy[i-1]) / tmp; + } + Mx[n-2] = Dx[n-3]; + My[n-2] = Dy[n-3]; + for(int i = n-4; i >= 0; i--) + { + Mx[i+1] = Dx[i] - C[i]*Mx[i+2]; + My[i+1] = Dy[i] - C[i]*My[i+2]; + } + + Mx[0] = 0; + Mx[n-1] = 0; + My[0] = 0; + My[n-1] = 0; + + for(int i = 0; i < n-1; i++) + { + func_v[i].a_x = point_v[i].x; + func_v[i].b_x = (point_v[i+1].x - point_v[i].x)/h[i] - (2*h[i]*Mx[i] + h[i]*Mx[i+1]) / 6; + func_v[i].c_x = Mx[i]/2; + func_v[i].d_x = (Mx[i+1] - Mx[i]) / (6*h[i]); + + func_v[i].a_y = point_v[i].y; + func_v[i].b_y = (point_v[i+1].y - point_v[i].y)/h[i] - (2*h[i]*My[i] + h[i]*My[i+1]) / 6; + func_v[i].c_y = My[i]/2; + func_v[i].d_y = (My[i+1] - My[i]) / (6*h[i]); + + func_v[i].h = h[i]; + } + return func_v; +} diff --git a/lanedet/runner/evaluator/tusimple/lane.py b/lanedet/runner/evaluator/tusimple/lane.py new file mode 100644 index 0000000..44abf70 --- /dev/null +++ b/lanedet/runner/evaluator/tusimple/lane.py @@ -0,0 +1,108 @@ +import numpy as np +from sklearn.linear_model import LinearRegression +import json as json + + +class LaneEval(object): + lr = LinearRegression() + pixel_thresh = 20 + pt_thresh = 0.85 + + @staticmethod + def get_angle(xs, y_samples): + xs, ys = xs[xs >= 0], y_samples[xs >= 0] + if len(xs) > 1: + LaneEval.lr.fit(ys[:, None], xs) + k = LaneEval.lr.coef_[0] + theta = np.arctan(k) + else: + theta = 0 + return theta + + @staticmethod + def line_accuracy(pred, gt, thresh): + pred = np.array([p if p >= 0 else -100 for p in pred]) + gt = np.array([g if g >= 0 else -100 for g in gt]) + return np.sum(np.where(np.abs(pred - gt) < thresh, 1., 0.)) / len(gt) + + @staticmethod + def bench(pred, gt, y_samples, running_time): + if any(len(p) != len(y_samples) for p in pred): + raise Exception('Format of lanes error.') + if running_time > 200 or len(gt) + 2 < len(pred): + return 0., 0., 1. + angles = [LaneEval.get_angle( + np.array(x_gts), np.array(y_samples)) for x_gts in gt] + threshs = [LaneEval.pixel_thresh / np.cos(angle) for angle in angles] + line_accs = [] + fp, fn = 0., 0. + matched = 0. + for x_gts, thresh in zip(gt, threshs): + accs = [LaneEval.line_accuracy( + np.array(x_preds), np.array(x_gts), thresh) for x_preds in pred] + max_acc = np.max(accs) if len(accs) > 0 else 0. + if max_acc < LaneEval.pt_thresh: + fn += 1 + else: + matched += 1 + line_accs.append(max_acc) + fp = len(pred) - matched + if len(gt) > 4 and fn > 0: + fn -= 1 + s = sum(line_accs) + if len(gt) > 4: + s -= min(line_accs) + return s / max(min(4.0, len(gt)), 1.), fp / len(pred) if len(pred) > 0 else 0., fn / max(min(len(gt), 4.), 1.) + + @staticmethod + def bench_one_submit(pred_file, gt_file): + try: + json_pred = [json.loads(line) + for line in open(pred_file).readlines()] + except BaseException as e: + raise Exception('Fail to load json file of the prediction.') + json_gt = [json.loads(line) for line in open(gt_file).readlines()] + if len(json_gt) != len(json_pred): + raise Exception( + 'We do not get the predictions of all the test tasks') + gts = {l['raw_file']: l for l in json_gt} + accuracy, fp, fn = 0., 0., 0. + for pred in json_pred: + if 'raw_file' not in pred or 'lanes' not in pred or 'run_time' not in pred: + raise Exception( + 'raw_file or lanes or run_time not in some predictions.') + raw_file = pred['raw_file'] + pred_lanes = pred['lanes'] + run_time = pred['run_time'] + if raw_file not in gts: + raise Exception( + 'Some raw_file from your predictions do not exist in the test tasks.') + gt = gts[raw_file] + gt_lanes = gt['lanes'] + y_samples = gt['h_samples'] + try: + a, p, n = LaneEval.bench( + pred_lanes, gt_lanes, y_samples, run_time) + except BaseException as e: + raise Exception('Format of lanes error.') + accuracy += a + fp += p + fn += n + num = len(gts) + # the first return parameter is the default ranking parameter + return json.dumps([ + {'name': 'Accuracy', 'value': accuracy / num, 'order': 'desc'}, + {'name': 'FP', 'value': fp / num, 'order': 'asc'}, + {'name': 'FN', 'value': fn / num, 'order': 'asc'} + ]), accuracy / num + + +if __name__ == '__main__': + import sys + try: + if len(sys.argv) != 3: + raise Exception('Invalid input arguments') + print(LaneEval.bench_one_submit(sys.argv[1], sys.argv[2])) + except Exception as e: + print(e.message) + sys.exit(e.message) diff --git a/lanedet/runner/evaluator/tusimple/tusimple.py b/lanedet/runner/evaluator/tusimple/tusimple.py new file mode 100644 index 0000000..647d5d6 --- /dev/null +++ b/lanedet/runner/evaluator/tusimple/tusimple.py @@ -0,0 +1,104 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F +from lanedet.runner.utils.logger import get_logger + +from lanedet.runner.registry import EVALUATOR +import json +import os +import cv2 + +from .lane import LaneEval + +def split_path(path): + """split path tree into list""" + folders = [] + while True: + path, folder = os.path.split(path) + if folder != "": + folders.insert(0, folder) + else: + if path != "": + folders.insert(0, path) + break + return folders + + +@EVALUATOR.register_module +class Tusimple(nn.Module): + def __init__(self, cfg): + super(Tusimple, self).__init__() + self.cfg = cfg + exp_dir = os.path.join(self.cfg.work_dir, "output") + if not os.path.exists(exp_dir): + os.mkdir(exp_dir) + self.out_path = os.path.join(exp_dir, "coord_output") + if not os.path.exists(self.out_path): + os.mkdir(self.out_path) + self.dump_to_json = [] + self.logger = get_logger('lanedet') + if cfg.view: + self.view_dir = os.path.join(self.cfg.work_dir, 'vis') + + def evaluate_lane(self, dataset, res, batch): + img_name = batch['meta']['img_name'] + img_path = batch['meta']['full_img_path'] + for b in range(len(res)): + lane_coords = res[b] + for i in range(len(lane_coords)): + lane_coords[i] = sorted( + lane_coords[i], key=lambda pair: pair[1]) + + path_tree = split_path(img_name[b]) + save_dir, save_name = path_tree[-3:-1], path_tree[-1] + save_dir = os.path.join(self.out_path, *save_dir) + save_name = save_name[:-3] + "lines.txt" + save_name = os.path.join(save_dir, save_name) + if not os.path.exists(save_dir): + os.makedirs(save_dir, exist_ok=True) + + with open(save_name, "w") as f: + for l in lane_coords: + for (x, y) in l: + print("{} {}".format(x, y), end=" ", file=f) + print(file=f) + + json_dict = {} + json_dict['lanes'] = [] + json_dict['h_sample'] = [] + json_dict['raw_file'] = os.path.join(*path_tree[-4:]) + json_dict['run_time'] = 0 + for l in lane_coords: + if len(l) == 0: + continue + json_dict['lanes'].append([]) + for (x, y) in l: + json_dict['lanes'][-1].append(int(x)) + for (x, y) in lane_coords[0]: + json_dict['h_sample'].append(y) + self.dump_to_json.append(json.dumps(json_dict)) + + if self.cfg.view: + img = cv2.imread(img_path[b]) + new_img_name = img_name[b].replace('/', '_') + save_dir = os.path.join(self.view_dir, new_img_name) + dataset.view(img, lane_coords, save_dir) + + def evaluate(self, dataset, output, batch): + res = dataset.get_lane(output) + self.evaluate_lane(dataset, res, batch) + + def summarize(self): + best_acc = 0 + output_file = os.path.join(self.out_path, 'predict_test.json') + with open(output_file, "w+") as f: + for line in self.dump_to_json: + print(line, end="\n", file=f) + + eval_result, acc = LaneEval.bench_one_submit(output_file, + self.cfg.test_json_file) + + self.logger.info(eval_result) + self.dump_to_json = [] + best_acc = max(acc, best_acc) + return best_acc diff --git a/lanedet/runner/optimizer.py b/lanedet/runner/optimizer.py new file mode 100644 index 0000000..6754f41 --- /dev/null +++ b/lanedet/runner/optimizer.py @@ -0,0 +1,25 @@ +import torch + +_optimizer_factory = { + 'adam': torch.optim.Adam, + 'sgd': torch.optim.SGD +} + + +def build_optimizer(cfg, net): + params = [] + lr = cfg.optimizer.lr + weight_decay = cfg.optimizer.weight_decay + + for key, value in net.named_parameters(): + if not value.requires_grad: + continue + params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] + + if 'adam' in cfg.optimizer.type: + optimizer = _optimizer_factory[cfg.optimizer.type](params, lr, weight_decay=weight_decay) + else: + optimizer = _optimizer_factory[cfg.optimizer.type]( + params, lr, weight_decay=weight_decay, momentum=cfg.optimizer.momentum) + + return optimizer diff --git a/lanedet/runner/registry.py b/lanedet/runner/registry.py new file mode 100644 index 0000000..4b543c7 --- /dev/null +++ b/lanedet/runner/registry.py @@ -0,0 +1,19 @@ +from lanedet.utils import Registry, build_from_cfg + +TRAINER = Registry('trainer') +EVALUATOR = Registry('evaluator') + +def build(cfg, registry, default_args=None): + if isinstance(cfg, list): + modules = [ + build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg + ] + return nn.Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + +def build_trainer(cfg): + return build(cfg.trainer, TRAINER, default_args=dict(cfg=cfg)) + +def build_evaluator(cfg): + return build(cfg.evaluator, EVALUATOR, default_args=dict(cfg=cfg)) diff --git a/lanedet/runner/runner.py b/lanedet/runner/runner.py new file mode 100644 index 0000000..6d9f0e8 --- /dev/null +++ b/lanedet/runner/runner.py @@ -0,0 +1,107 @@ +import time +import torch +from tqdm import tqdm +import pytorch_warmup as warmup + +from lanedet.models.registry import build_net +from .registry import build_trainer, build_evaluator +from .optimizer import build_optimizer +from .scheduler import build_scheduler +from lanedet.datasets import build_dataloader +from .utils.recorder import build_recorder +from .utils.net_utils import save_model, load_network + + +class Runner(object): + def __init__(self, cfg): + self.cfg = cfg + self.recorder = build_recorder(self.cfg) + self.net = build_net(self.cfg) + self.net = torch.nn.parallel.DataParallel( + self.net, device_ids = range(self.cfg.gpus)).cuda() + self.recorder.logger.info('Network: \n' + str(self.net)) + self.resume() + self.optimizer = build_optimizer(self.cfg, self.net) + self.scheduler = build_scheduler(self.cfg, self.optimizer) + self.evaluator = build_evaluator(self.cfg) + self.warmup_scheduler = warmup.LinearWarmup( + self.optimizer, warmup_period=5000) + self.metric = 0. + + def resume(self): + if not self.cfg.load_from and not self.cfg.finetune_from: + return + load_network(self.net, self.cfg.load_from, + finetune_from=self.cfg.finetune_from, logger=self.recorder.logger) + + def to_cuda(self, batch): + for k in batch: + if k == 'meta': + continue + batch[k] = batch[k].cuda() + return batch + + def train_epoch(self, epoch, train_loader): + self.net.train() + end = time.time() + max_iter = len(train_loader) + for i, data in enumerate(train_loader): + if self.recorder.step >= self.cfg.total_iter: + break + date_time = time.time() - end + self.recorder.step += 1 + data = self.to_cuda(data) + output = self.trainer.forward(self.net, data) + self.optimizer.zero_grad() + loss = output['loss'] + loss.backward() + self.optimizer.step() + self.scheduler.step() + self.warmup_scheduler.dampen() + batch_time = time.time() - end + end = time.time() + self.recorder.update_loss_stats(output['loss_stats']) + self.recorder.batch_time.update(batch_time) + self.recorder.data_time.update(date_time) + + if i % self.cfg.log_interval == 0 or i == max_iter - 1: + lr = self.optimizer.param_groups[0]['lr'] + self.recorder.lr = lr + self.recorder.record('train') + + def train(self): + self.net.train() + self.recorder.logger.info('start training...') + self.trainer = build_trainer(self.cfg) + train_loader = build_dataloader(self.cfg.dataset.train, self.cfg, is_train=True) + val_loader = build_dataloader(self.cfg.dataset.val, self.cfg, is_train=False) + + for epoch in range(self.cfg.epochs): + self.recorder.epoch = epoch + self.train_epoch(epoch, train_loader) + if (epoch + 1) % self.cfg.save_ep == 0 or epoch == self.cfg.epochs - 1: + self.save_ckpt() + if (epoch + 1) % self.cfg.eval_ep == 0 or epoch == self.cfg.epochs - 1: + self.validate(val_loader) + if self.recorder.step >= self.cfg.total_iter: + break + + def validate(self, val_loader): + self.net.eval() + for i, data in enumerate(tqdm(val_loader, desc=f'Validate')): + data = self.to_cuda(data) + with torch.no_grad(): + output = self.net(data['img']) + self.evaluator.evaluate(val_loader.dataset, output, data) + + metric = self.evaluator.summarize() + if not metric: + return + if metric > self.metric: + self.metric = metric + self.save_ckpt(is_best=True) + self.recorder.logger.info('Best metric: ' + str(self.metric)) + + def save_ckpt(self, is_best=False): + save_model(self.net, self.optimizer, self.scheduler, + self.recorder, is_best) diff --git a/lanedet/runner/scheduler.py b/lanedet/runner/scheduler.py new file mode 100644 index 0000000..8648582 --- /dev/null +++ b/lanedet/runner/scheduler.py @@ -0,0 +1,21 @@ +import torch +import math + + +_scheduler_factory = { + 'LambdaLR': torch.optim.lr_scheduler.LambdaLR, + 'CosineAnnealingLR': torch.optim.lr_scheduler.CosineAnnealingLR, +} + + +def build_scheduler(cfg, optimizer): + + assert cfg.scheduler.type in _scheduler_factory + + cfg_cp = cfg.scheduler.copy() + cfg_cp.pop('type') + + scheduler = _scheduler_factory[cfg.scheduler.type](optimizer, **cfg_cp) + + + return scheduler diff --git a/lanedet/runner/trainer/__init__.py b/lanedet/runner/trainer/__init__.py new file mode 100644 index 0000000..6acbcd9 --- /dev/null +++ b/lanedet/runner/trainer/__init__.py @@ -0,0 +1,2 @@ +from .resa import RESA +from .lane_cls import LaneCls diff --git a/lanedet/runner/trainer/lane_cls.py b/lanedet/runner/trainer/lane_cls.py new file mode 100644 index 0000000..a1503be --- /dev/null +++ b/lanedet/runner/trainer/lane_cls.py @@ -0,0 +1,28 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F + +from .losses.focal_loss import SoftmaxFocalLoss +from lanedet.runner.registry import TRAINER + +@TRAINER.register_module +class LaneCls(nn.Module): + def __init__(self, cfg): + super(LaneCls, self).__init__() + self.cfg = cfg + self.loss = SoftmaxFocalLoss(2) + self.reg_loss = F.smooth_l1_loss + + def forward(self, net, batch): + output = net(batch['img']) + + loss_stats = {} + loss = 0. + + cls_loss = self.loss(output['cls'], batch['cls_label']) + loss += cls_loss + loss_stats.update({'cls_loss': cls_loss}) + + ret = {'loss': loss, 'loss_stats': loss_stats} + + return ret diff --git a/lanedet/runner/trainer/losses/dice_loss.py b/lanedet/runner/trainer/losses/dice_loss.py new file mode 100644 index 0000000..a95b762 --- /dev/null +++ b/lanedet/runner/trainer/losses/dice_loss.py @@ -0,0 +1,15 @@ +import torch +from torch import nn +from torch.autograd import Variable +from torch import einsum +import numpy as np + +def dice_loss(input, target): + input = input.contiguous().view(input.size()[0], -1) + target = target.contiguous().view(target.size()[0], -1).float() + + a = torch.sum(input * target, 1) + b = torch.sum(input * input, 1) + 0.001 + c = torch.sum(target * target, 1) + 0.001 + d = (2 * a) / (b + c) + return (1-d).mean() diff --git a/lanedet/runner/trainer/losses/focal_loss.py b/lanedet/runner/trainer/losses/focal_loss.py new file mode 100644 index 0000000..929f225 --- /dev/null +++ b/lanedet/runner/trainer/losses/focal_loss.py @@ -0,0 +1,17 @@ +import torch +from torch import nn +import torch.nn.functional as F + +class SoftmaxFocalLoss(nn.Module): + def __init__(self, gamma, ignore_lb=255, *args, **kwargs): + super(SoftmaxFocalLoss, self).__init__() + self.gamma = gamma + self.nll = nn.NLLLoss(ignore_index=ignore_lb) + + def forward(self, logits, labels): + scores = F.softmax(logits, dim=1) + factor = torch.pow(1.-scores, self.gamma) + log_score = F.log_softmax(logits, dim=1) + log_score = factor * log_score + loss = self.nll(log_score, labels) + return loss diff --git a/lanedet/runner/trainer/resa.py b/lanedet/runner/trainer/resa.py new file mode 100644 index 0000000..5bd9b0e --- /dev/null +++ b/lanedet/runner/trainer/resa.py @@ -0,0 +1,44 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F + +from lanedet.runner.registry import TRAINER +from .losses.dice_loss import dice_loss +from .losses.focal_loss import SoftmaxFocalLoss + +@TRAINER.register_module +class RESA(nn.Module): + def __init__(self, cfg): + super(RESA, self).__init__() + self.cfg = cfg + weights = torch.ones(cfg.num_classes) + weights[0] = cfg.bg_weight + weights = weights.cuda() + self.criterion = torch.nn.NLLLoss(ignore_index=self.cfg.ignore_label, + weight=weights).cuda() + self.criterion_exist = torch.nn.BCEWithLogitsLoss().cuda() + self.focal_loss = SoftmaxFocalLoss(2.) + + def forward(self, net, batch): + output = net(batch['img']) + + loss_stats = {} + loss = 0. + + seg_loss = self.criterion(F.log_softmax( + output['seg'], dim=1), batch['label'].long()) + # target = F.one_hot(batch['label'], num_classes=self.cfg.num_classes).permute(0, 3, 1, 2) + # seg_loss = dice_loss(F.softmax( + # output['seg'], dim=1)[:, 1:], target[:, 1:]) * self.cfg.seg_loss_weight + loss += seg_loss + loss_stats.update({'seg_loss': seg_loss}) + + if 'exist' in output: + exist_loss = 0.1 * \ + self.criterion_exist(output['exist'], batch['exist'].float()) + loss += exist_loss + loss_stats.update({'exist_loss': exist_loss}) + + ret = {'loss': loss, 'loss_stats': loss_stats} + + return ret diff --git a/lanedet/runner/utils/logger.py b/lanedet/runner/utils/logger.py new file mode 100644 index 0000000..189d353 --- /dev/null +++ b/lanedet/runner/utils/logger.py @@ -0,0 +1,50 @@ +import logging + +logger_initialized = {} + +def get_logger(name, log_file=None, log_level=logging.INFO): + """Initialize and get a logger by name. + If the logger has not been initialized, this method will initialize the + logger by adding one or two handlers, otherwise the initialized logger will + be directly returned. During initialization, a StreamHandler will always be + added. If `log_file` is specified and the process rank is 0, a FileHandler + will also be added. + Args: + name (str): Logger name. + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the logger. + log_level (int): The logger level. Note that only the process of + rank 0 is affected, and other processes will set the level to + "Error" thus be silent most of the time. + Returns: + logging.Logger: The expected logger. + """ + logger = logging.getLogger(name) + if name in logger_initialized: + return logger + # handle hierarchical names + # e.g., logger "a" is initialized, then logger "a.b" will skip the + # initialization since it is a child of "a". + for logger_name in logger_initialized: + if name.startswith(logger_name): + return logger + + stream_handler = logging.StreamHandler() + handlers = [stream_handler] + + if log_file is not None: + file_handler = logging.FileHandler(log_file, 'w') + handlers.append(file_handler) + + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + for handler in handlers: + handler.setFormatter(formatter) + handler.setLevel(log_level) + logger.addHandler(handler) + + logger.setLevel(log_level) + + logger_initialized[name] = True + + return logger diff --git a/lanedet/runner/utils/net_utils.py b/lanedet/runner/utils/net_utils.py new file mode 100644 index 0000000..8abed44 --- /dev/null +++ b/lanedet/runner/utils/net_utils.py @@ -0,0 +1,49 @@ +import torch +import os +from torch import nn +import numpy as np +import torch.nn.functional +from termcolor import colored +from .logger import get_logger + +def save_model(net, optim, scheduler, recorder, is_best=False): + model_dir = os.path.join(recorder.work_dir, 'ckpt') + os.system('mkdir -p {}'.format(model_dir)) + epoch = recorder.epoch + ckpt_name = 'best' if is_best else epoch + torch.save({ + 'net': net.state_dict(), + 'optim': optim.state_dict(), + 'scheduler': scheduler.state_dict(), + 'recorder': recorder.state_dict(), + 'epoch': epoch + }, os.path.join(model_dir, '{}.pth'.format(ckpt_name))) + + # remove previous pretrained model if the number of models is too big + # pths = [int(pth.split('.')[0]) for pth in os.listdir(model_dir)] + # if len(pths) <= 2: + # return + # os.system('rm {}'.format(os.path.join(model_dir, '{}.pth'.format(min(pths))))) + + +def load_network_specified(net, model_dir, logger=None): + pretrained_net = torch.load(model_dir)['net'] + net_state = net.state_dict() + state = {} + for k, v in pretrained_net.items(): + if k not in net_state.keys() or v.size() != net_state[k].size(): + if logger: + logger.info('skip weights: ' + k) + continue + state[k] = v + net.load_state_dict(state, strict=False) + + +def load_network(net, model_dir, finetune_from=None, logger=None): + if finetune_from: + if logger: + logger.info('Finetune model from: ' + finetune_from) + load_network_specified(net, finetune_from, logger) + return + pretrained_model = torch.load(model_dir) + net.load_state_dict(pretrained_model['net'], strict=True) diff --git a/lanedet/runner/utils/recorder.py b/lanedet/runner/utils/recorder.py new file mode 100644 index 0000000..e5fc9c6 --- /dev/null +++ b/lanedet/runner/utils/recorder.py @@ -0,0 +1,100 @@ +from collections import deque, defaultdict +import torch +import os +import datetime +from .logger import get_logger + + +class SmoothedValue(object): + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=20): + self.deque = deque(maxlen=window_size) + self.total = 0.0 + self.count = 0 + + def update(self, value): + self.deque.append(value) + self.count += 1 + self.total += value + + @property + def median(self): + d = torch.tensor(list(self.deque)) + return d.median().item() + + @property + def avg(self): + d = torch.tensor(list(self.deque)) + return d.mean().item() + + @property + def global_avg(self): + return self.total / self.count + + +class Recorder(object): + def __init__(self, cfg): + self.cfg = cfg + self.work_dir = self.get_work_dir() + cfg.work_dir = self.work_dir + self.log_path = os.path.join(self.work_dir, 'log.txt') + + self.logger = get_logger('lanedet', self.log_path) + self.logger.info('Config: \n' + cfg.text) + + # scalars + self.epoch = 0 + self.step = 0 + self.loss_stats = defaultdict(SmoothedValue) + self.batch_time = SmoothedValue() + self.data_time = SmoothedValue() + self.max_iter = self.cfg.total_iter + self.lr = 0. + + def get_work_dir(self): + now = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + hyper_param_str = '_lr_%1.0e_b_%d' % (self.cfg.optimizer.lr, self.cfg.batch_size) + work_dir = os.path.join(self.cfg.work_dirs, now + hyper_param_str) + if not os.path.exists(work_dir): + os.makedirs(work_dir) + return work_dir + + def update_loss_stats(self, loss_dict): + for k, v in loss_dict.items(): + self.loss_stats[k].update(v.detach().cpu()) + + def record(self, prefix, step=-1, loss_stats=None, image_stats=None): + self.logger.info(self) + # self.write(str(self)) + + def write(self, content): + with open(self.log_path, 'a+') as f: + f.write(content) + f.write('\n') + + def state_dict(self): + scalar_dict = {} + scalar_dict['step'] = self.step + return scalar_dict + + def load_state_dict(self, scalar_dict): + self.step = scalar_dict['step'] + + def __str__(self): + loss_state = [] + for k, v in self.loss_stats.items(): + loss_state.append('{}: {:.4f}'.format(k, v.avg)) + loss_state = ' '.join(loss_state) + + recording_state = ' '.join(['epoch: {}', 'step: {}', 'lr: {:.4f}', '{}', 'data: {:.4f}', 'batch: {:.4f}', 'eta: {}']) + eta_seconds = self.batch_time.global_avg * (self.max_iter - self.step) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + return recording_state.format(self.epoch, self.step, self.lr, loss_state, self.data_time.avg, self.batch_time.avg, eta_string) + + +def build_recorder(cfg): + return Recorder(cfg) + diff --git a/lanedet/utils/__init__.py b/lanedet/utils/__init__.py new file mode 100644 index 0000000..eb99ab0 --- /dev/null +++ b/lanedet/utils/__init__.py @@ -0,0 +1,2 @@ +from .config import Config +from .registry import Registry, build_from_cfg diff --git a/lanedet/utils/config.py b/lanedet/utils/config.py new file mode 100755 index 0000000..42a0ff2 --- /dev/null +++ b/lanedet/utils/config.py @@ -0,0 +1,417 @@ +# Copyright (c) Open-MMLab. All rights reserved. +import ast +import os.path as osp +import shutil +import sys +import tempfile +from argparse import Action, ArgumentParser +from collections import abc +from importlib import import_module + +from addict import Dict +from yapf.yapflib.yapf_api import FormatCode + + +BASE_KEY = '_base_' +DELETE_KEY = '_delete_' +RESERVED_KEYS = ['filename', 'text', 'pretty_text'] + +def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): + if not osp.isfile(filename): + raise FileNotFoundError(msg_tmpl.format(filename)) + + + +class ConfigDict(Dict): + + def __missing__(self, name): + raise KeyError(name) + + def __getattr__(self, name): + try: + value = super(ConfigDict, self).__getattr__(name) + except KeyError: + ex = AttributeError(f"'{self.__class__.__name__}' object has no " + f"attribute '{name}'") + except Exception as e: + ex = e + else: + return value + raise ex + + +def add_args(parser, cfg, prefix=''): + for k, v in cfg.items(): + if isinstance(v, str): + parser.add_argument('--' + prefix + k) + elif isinstance(v, int): + parser.add_argument('--' + prefix + k, type=int) + elif isinstance(v, float): + parser.add_argument('--' + prefix + k, type=float) + elif isinstance(v, bool): + parser.add_argument('--' + prefix + k, action='store_true') + elif isinstance(v, dict): + add_args(parser, v, prefix + k + '.') + elif isinstance(v, abc.Iterable): + parser.add_argument('--' + prefix + k, type=type(v[0]), nargs='+') + else: + print(f'cannot parse key {prefix + k} of type {type(v)}') + return parser + + +class Config: + """A facility for config and config files. + It supports common file formats as configs: python/json/yaml. The interface + is the same as a dict object and also allows access config values as + attributes. + Example: + >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) + >>> cfg.a + 1 + >>> cfg.b + {'b1': [0, 1]} + >>> cfg.b.b1 + [0, 1] + >>> cfg = Config.fromfile('tests/data/config/a.py') + >>> cfg.filename + "/home/kchen/projects/mmcv/tests/data/config/a.py" + >>> cfg.item4 + 'test' + >>> cfg + "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " + "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" + """ + + @staticmethod + def _validate_py_syntax(filename): + with open(filename) as f: + content = f.read() + try: + ast.parse(content) + except SyntaxError: + raise SyntaxError('There are syntax errors in config ' + f'file {filename}') + + @staticmethod + def _file2dict(filename): + filename = osp.abspath(osp.expanduser(filename)) + check_file_exist(filename) + if filename.endswith('.py'): + with tempfile.TemporaryDirectory() as temp_config_dir: + temp_config_file = tempfile.NamedTemporaryFile( + dir=temp_config_dir, suffix='.py') + temp_config_name = osp.basename(temp_config_file.name) + shutil.copyfile(filename, + osp.join(temp_config_dir, temp_config_name)) + temp_module_name = osp.splitext(temp_config_name)[0] + sys.path.insert(0, temp_config_dir) + Config._validate_py_syntax(filename) + mod = import_module(temp_module_name) + sys.path.pop(0) + cfg_dict = { + name: value + for name, value in mod.__dict__.items() + if not name.startswith('__') + } + # delete imported module + del sys.modules[temp_module_name] + # close temp file + temp_config_file.close() + elif filename.endswith(('.yml', '.yaml', '.json')): + import mmcv + cfg_dict = mmcv.load(filename) + else: + raise IOError('Only py/yml/yaml/json type are supported now!') + + cfg_text = filename + '\n' + with open(filename, 'r') as f: + cfg_text += f.read() + + if BASE_KEY in cfg_dict: + cfg_dir = osp.dirname(filename) + base_filename = cfg_dict.pop(BASE_KEY) + base_filename = base_filename if isinstance( + base_filename, list) else [base_filename] + + cfg_dict_list = list() + cfg_text_list = list() + for f in base_filename: + _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) + cfg_dict_list.append(_cfg_dict) + cfg_text_list.append(_cfg_text) + + base_cfg_dict = dict() + for c in cfg_dict_list: + if len(base_cfg_dict.keys() & c.keys()) > 0: + raise KeyError('Duplicate key is not allowed among bases') + base_cfg_dict.update(c) + + base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) + cfg_dict = base_cfg_dict + + # merge cfg_text + cfg_text_list.append(cfg_text) + cfg_text = '\n'.join(cfg_text_list) + + return cfg_dict, cfg_text + + @staticmethod + def _merge_a_into_b(a, b): + # merge dict `a` into dict `b` (non-inplace). values in `a` will + # overwrite `b`. + # copy first to avoid inplace modification + b = b.copy() + for k, v in a.items(): + if isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): + if not isinstance(b[k], dict): + raise TypeError( + f'{k}={v} in child config cannot inherit from base ' + f'because {k} is a dict in the child config but is of ' + f'type {type(b[k])} in base config. You may set ' + f'`{DELETE_KEY}=True` to ignore the base config') + b[k] = Config._merge_a_into_b(v, b[k]) + else: + b[k] = v + return b + + @staticmethod + def fromfile(filename): + cfg_dict, cfg_text = Config._file2dict(filename) + return Config(cfg_dict, cfg_text=cfg_text, filename=filename) + + @staticmethod + def auto_argparser(description=None): + """Generate argparser from config file automatically (experimental) + """ + partial_parser = ArgumentParser(description=description) + partial_parser.add_argument('config', help='config file path') + cfg_file = partial_parser.parse_known_args()[0].config + cfg = Config.fromfile(cfg_file) + parser = ArgumentParser(description=description) + parser.add_argument('config', help='config file path') + add_args(parser, cfg) + return parser, cfg + + def __init__(self, cfg_dict=None, cfg_text=None, filename=None): + if cfg_dict is None: + cfg_dict = dict() + elif not isinstance(cfg_dict, dict): + raise TypeError('cfg_dict must be a dict, but ' + f'got {type(cfg_dict)}') + for key in cfg_dict: + if key in RESERVED_KEYS: + raise KeyError(f'{key} is reserved for config file') + + super(Config, self).__setattr__('_cfg_dict', ConfigDict(cfg_dict)) + super(Config, self).__setattr__('_filename', filename) + if cfg_text: + text = cfg_text + elif filename: + with open(filename, 'r') as f: + text = f.read() + else: + text = '' + super(Config, self).__setattr__('_text', text) + + @property + def filename(self): + return self._filename + + @property + def text(self): + return self._text + + @property + def pretty_text(self): + + indent = 4 + + def _indent(s_, num_spaces): + s = s_.split('\n') + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(num_spaces * ' ') + line for line in s] + s = '\n'.join(s) + s = first + '\n' + s + return s + + def _format_basic_types(k, v, use_mapping=False): + if isinstance(v, str): + v_str = f"'{v}'" + else: + v_str = str(v) + + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + + return attr_str + + def _format_list(k, v, use_mapping=False): + # check if all items in the list are dict + if all(isinstance(_, dict) for _ in v): + v_str = '[\n' + v_str += '\n'.join( + f'dict({_indent(_format_dict(v_), indent)}),' + for v_ in v).rstrip(',') + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: {v_str}' + else: + attr_str = f'{str(k)}={v_str}' + attr_str = _indent(attr_str, indent) + ']' + else: + attr_str = _format_basic_types(k, v, use_mapping) + return attr_str + + def _contain_invalid_identifier(dict_str): + contain_invalid_identifier = False + for key_name in dict_str: + contain_invalid_identifier |= \ + (not str(key_name).isidentifier()) + return contain_invalid_identifier + + def _format_dict(input_dict, outest_level=False): + r = '' + s = [] + + use_mapping = _contain_invalid_identifier(input_dict) + if use_mapping: + r += '{' + for idx, (k, v) in enumerate(input_dict.items()): + is_last = idx >= len(input_dict) - 1 + end = '' if outest_level or is_last else ',' + if isinstance(v, dict): + v_str = '\n' + _format_dict(v) + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f'{k_str}: dict({v_str}' + else: + attr_str = f'{str(k)}=dict({v_str}' + attr_str = _indent(attr_str, indent) + ')' + end + elif isinstance(v, list): + attr_str = _format_list(k, v, use_mapping) + end + else: + attr_str = _format_basic_types(k, v, use_mapping) + end + + s.append(attr_str) + r += '\n'.join(s) + if use_mapping: + r += '}' + return r + + cfg_dict = self._cfg_dict.to_dict() + text = _format_dict(cfg_dict, outest_level=True) + # copied from setup.cfg + yapf_style = dict( + based_on_style='pep8', + blank_line_before_nested_class_or_def=True, + split_before_expression_after_opening_paren=True) + text, _ = FormatCode(text, style_config=yapf_style, verify=True) + + return text + + def __repr__(self): + return f'Config (path: {self.filename}): {self._cfg_dict.__repr__()}' + + def __len__(self): + return len(self._cfg_dict) + + def __getattr__(self, name): + return getattr(self._cfg_dict, name) + + def __getitem__(self, name): + return self._cfg_dict.__getitem__(name) + + def __setattr__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setattr__(name, value) + + def __setitem__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setitem__(name, value) + + def __iter__(self): + return iter(self._cfg_dict) + + def dump(self, file=None): + cfg_dict = super(Config, self).__getattribute__('_cfg_dict').to_dict() + if self.filename.endswith('.py'): + if file is None: + return self.pretty_text + else: + with open(file, 'w') as f: + f.write(self.pretty_text) + else: + import mmcv + if file is None: + file_format = self.filename.split('.')[-1] + return mmcv.dump(cfg_dict, file_format=file_format) + else: + mmcv.dump(cfg_dict, file) + + def merge_from_dict(self, options): + """Merge list into cfg_dict + Merge the dict parsed by MultipleKVAction into this cfg. + Examples: + >>> options = {'model.backbone.depth': 50, + ... 'model.backbone.with_cp':True} + >>> cfg = Config(dict(model=dict(backbone=dict(type='ResNet')))) + >>> cfg.merge_from_dict(options) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict( + ... model=dict(backbone=dict(depth=50, with_cp=True))) + Args: + options (dict): dict of configs to merge from. + """ + option_cfg_dict = {} + for full_key, v in options.items(): + d = option_cfg_dict + key_list = full_key.split('.') + for subkey in key_list[:-1]: + d.setdefault(subkey, ConfigDict()) + d = d[subkey] + subkey = key_list[-1] + d[subkey] = v + + cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + super(Config, self).__setattr__( + '_cfg_dict', Config._merge_a_into_b(option_cfg_dict, cfg_dict)) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options should + be passed as comma separated values, i.e KEY=V1,V2,V3 + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ['true', 'false']: + return True if val.lower() == 'true' else False + return val + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split('=', maxsplit=1) + val = [self._parse_int_float_bool(v) for v in val.split(',')] + if len(val) == 1: + val = val[0] + options[key] = val + setattr(namespace, self.dest, options) diff --git a/lanedet/utils/mytransforms.py b/lanedet/utils/mytransforms.py new file mode 100644 index 0000000..1b86f63 --- /dev/null +++ b/lanedet/utils/mytransforms.py @@ -0,0 +1,167 @@ +import numbers +import random +import numpy as np +from PIL import Image, ImageOps, ImageFilter +#from config import cfg +import torch +import pdb +import cv2 + +# ===============================img tranforms============================ + +class Compose2(object): + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img, mask, bbx=None): + if bbx is None: + for t in self.transforms: + img, mask = t(img, mask) + return img, mask + for t in self.transforms: + img, mask, bbx = t(img, mask, bbx) + return img, mask, bbx + +class FreeScale(object): + def __init__(self, size): + self.size = size # (h, w) + + def __call__(self, img, mask): + return img.resize((self.size[1], self.size[0]), Image.BILINEAR), mask.resize((self.size[1], self.size[0]), Image.NEAREST) + +class FreeScaleMask(object): + def __init__(self,size): + self.size = size + def __call__(self,mask): + return mask.resize((self.size[1], self.size[0]), Image.NEAREST) + +class Scale(object): + def __init__(self, size): + self.size = size + + def __call__(self, img, mask): + if img.size != mask.size: + print(img.size) + print(mask.size) + assert img.size == mask.size + w, h = img.size + if (w <= h and w == self.size) or (h <= w and h == self.size): + return img, mask + if w < h: + ow = self.size + oh = int(self.size * h / w) + return img.resize((ow, oh), Image.BILINEAR), mask.resize((ow, oh), Image.NEAREST) + else: + oh = self.size + ow = int(self.size * w / h) + return img.resize((ow, oh), Image.BILINEAR), mask.resize((ow, oh), Image.NEAREST) + + +class RandomRotate(object): + """Crops the given PIL.Image at a random location to have a region of + the given size. size can be a tuple (target_height, target_width) + or an integer, in which case the target will be of a square shape (size, size) + """ + + def __init__(self, angle): + self.angle = angle + + def __call__(self, image, label): + assert label is None or image.size == label.size + + + angle = random.randint(0, self.angle * 2) - self.angle + + label = label.rotate(angle, resample=Image.NEAREST) + image = image.rotate(angle, resample=Image.BILINEAR) + + return image, label + + + +# ===============================label tranforms============================ + +class DeNormalize(object): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, tensor): + for t, m, s in zip(tensor, self.mean, self.std): + t.mul_(s).add_(m) + return tensor + + +class MaskToTensor(object): + def __call__(self, img): + return torch.from_numpy(np.array(img, dtype=np.int32)).long() + + +def find_start_pos(row_sample,start_line): + # row_sample = row_sample.sort() + # for i,r in enumerate(row_sample): + # if r >= start_line: + # return i + l,r = 0,len(row_sample)-1 + while True: + mid = int((l+r)/2) + if r - l == 1: + return r + if row_sample[mid] < start_line: + l = mid + if row_sample[mid] > start_line: + r = mid + if row_sample[mid] == start_line: + return mid + +class RandomLROffsetLABEL(object): + def __init__(self,max_offset): + self.max_offset = max_offset + def __call__(self,img,label): + offset = np.random.randint(-self.max_offset,self.max_offset) + w, h = img.size + + img = np.array(img) + if offset > 0: + img[:,offset:,:] = img[:,0:w-offset,:] + img[:,:offset,:] = 0 + if offset < 0: + real_offset = -offset + img[:,0:w-real_offset,:] = img[:,real_offset:,:] + img[:,w-real_offset:,:] = 0 + + label = np.array(label) + if offset > 0: + label[:,offset:] = label[:,0:w-offset] + label[:,:offset] = 0 + if offset < 0: + offset = -offset + label[:,0:w-offset] = label[:,offset:] + label[:,w-offset:] = 0 + return Image.fromarray(img),Image.fromarray(label) + +class RandomUDoffsetLABEL(object): + def __init__(self,max_offset): + self.max_offset = max_offset + def __call__(self,img,label): + offset = np.random.randint(-self.max_offset,self.max_offset) + w, h = img.size + + img = np.array(img) + if offset > 0: + img[offset:,:,:] = img[0:h-offset,:,:] + img[:offset,:,:] = 0 + if offset < 0: + real_offset = -offset + img[0:h-real_offset,:,:] = img[real_offset:,:,:] + img[h-real_offset:,:,:] = 0 + + label = np.array(label) + if offset > 0: + label[offset:,:] = label[0:h-offset,:] + label[:offset,:] = 0 + if offset < 0: + offset = -offset + label[0:h-offset,:] = label[offset:,:] + label[h-offset:,:] = 0 + return Image.fromarray(img),Image.fromarray(label) diff --git a/lanedet/utils/registry.py b/lanedet/utils/registry.py new file mode 100644 index 0000000..4b9f478 --- /dev/null +++ b/lanedet/utils/registry.py @@ -0,0 +1,81 @@ +import inspect + +import six + +# borrow from mmdetection + +def is_str(x): + """Whether the input is an string instance.""" + return isinstance(x, six.string_types) + +class Registry(object): + + def __init__(self, name): + self._name = name + self._module_dict = dict() + + def __repr__(self): + format_str = self.__class__.__name__ + '(name={}, items={})'.format( + self._name, list(self._module_dict.keys())) + return format_str + + @property + def name(self): + return self._name + + @property + def module_dict(self): + return self._module_dict + + def get(self, key): + return self._module_dict.get(key, None) + + def _register_module(self, module_class): + """Register a module. + + Args: + module (:obj:`nn.Module`): Module to be registered. + """ + if not inspect.isclass(module_class): + raise TypeError('module must be a class, but got {}'.format( + type(module_class))) + module_name = module_class.__name__ + if module_name in self._module_dict: + raise KeyError('{} is already registered in {}'.format( + module_name, self.name)) + self._module_dict[module_name] = module_class + + def register_module(self, cls): + self._register_module(cls) + return cls + + +def build_from_cfg(cfg, registry, default_args=None): + """Build a module from config dict. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + + Returns: + obj: The constructed object. + """ + assert isinstance(cfg, dict) and 'type' in cfg + assert isinstance(default_args, dict) or default_args is None + args = cfg.copy() + obj_type = args.pop('type') + if is_str(obj_type): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError('{} is not in the {} registry'.format( + obj_type, registry.name)) + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError('type must be a str or valid type, but got {}'.format( + type(obj_type))) + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + return obj_cls(**args) diff --git a/lanedet/utils/transforms.py b/lanedet/utils/transforms.py new file mode 100644 index 0000000..c915716 --- /dev/null +++ b/lanedet/utils/transforms.py @@ -0,0 +1,358 @@ +import random +import cv2 +import numpy as np +import numbers +import collections + +__all__ = ['GroupRandomCrop', 'GroupCenterCrop', 'GroupRandomPad', 'GroupCenterPad', + 'GroupRandomScale', 'GroupRandomHorizontalFlip', 'GroupNormalize'] + + +class SampleResize(object): + def __init__(self, size): + assert (isinstance(size, collections.Iterable) and len(size) == 2) + self.size = size + + def __call__(self, sample): + out = list() + out.append(cv2.resize(sample[0], self.size, + interpolation=cv2.INTER_CUBIC)) + out.append(cv2.resize(sample[1], self.size, + interpolation=cv2.INTER_NEAREST)) + return out + + +class GroupRandomCrop(object): + def __init__(self, size): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + + def __call__(self, img_group): + h, w = img_group[0].shape[0:2] + th, tw = self.size + + out_images = list() + h1 = random.randint(0, max(0, h - th)) + w1 = random.randint(0, max(0, w - tw)) + h2 = min(h1 + th, h) + w2 = min(w1 + tw, w) + + for img in img_group: + assert (img.shape[0] == h and img.shape[1] == w) + out_images.append(img[h1:h2, w1:w2, ...]) + return out_images + + +# class GroupRandomCropRatio(object): +# def __init__(self, size): +# if isinstance(size, numbers.Number): +# self.size = (int(size), int(size)) +# else: +# self.size = size + +# def __call__(self, img_group): +# h, w = img_group[0].shape[0:2] +# tw, th = self.size + +# out_images = list() +# h1 = random.randint(0, max(0, h - th)) +# w1 = random.randint(0, max(0, w - tw)) +# h2 = min(h1 + th, h) +# w2 = min(w1 + tw, w) + +# for img in img_group: +# assert (img.shape[0] == h and img.shape[1] == w) +# out_images.append(img[h1:h2, w1:w2, ...]) +# return out_images + + +class GroupCenterCrop(object): + def __init__(self, size): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + + def __call__(self, img_group): + h, w = img_group[0].shape[0:2] + th, tw = self.size + + out_images = list() + h1 = max(0, int((h - th) / 2)) + w1 = max(0, int((w - tw) / 2)) + h2 = min(h1 + th, h) + w2 = min(w1 + tw, w) + + for img in img_group: + assert (img.shape[0] == h and img.shape[1] == w) + out_images.append(img[h1:h2, w1:w2, ...]) + return out_images + + +class GroupRandomPad(object): + def __init__(self, size, padding): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + self.padding = padding + + def __call__(self, img_group): + assert (len(self.padding) == len(img_group)) + h, w = img_group[0].shape[0:2] + th, tw = self.size + + out_images = list() + h1 = random.randint(0, max(0, th - h)) + w1 = random.randint(0, max(0, tw - w)) + h2 = max(th - h - h1, 0) + w2 = max(tw - w - w1, 0) + + for img, padding in zip(img_group, self.padding): + assert (img.shape[0] == h and img.shape[1] == w) + out_images.append(cv2.copyMakeBorder( + img, h1, h2, w1, w2, cv2.BORDER_CONSTANT, value=padding)) + if len(img.shape) > len(out_images[-1].shape): + out_images[-1] = out_images[-1][..., + np.newaxis] # single channel image + return out_images + + +class GroupCenterPad(object): + def __init__(self, size, padding): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + self.padding = padding + + def __call__(self, img_group): + assert (len(self.padding) == len(img_group)) + h, w = img_group[0].shape[0:2] + th, tw = self.size + + out_images = list() + h1 = max(0, int((th - h) / 2)) + w1 = max(0, int((tw - w) / 2)) + h2 = max(th - h - h1, 0) + w2 = max(tw - w - w1, 0) + + for img, padding in zip(img_group, self.padding): + assert (img.shape[0] == h and img.shape[1] == w) + out_images.append(cv2.copyMakeBorder( + img, h1, h2, w1, w2, cv2.BORDER_CONSTANT, value=padding)) + if len(img.shape) > len(out_images[-1].shape): + out_images[-1] = out_images[-1][..., + np.newaxis] # single channel image + return out_images + + +class GroupConcerPad(object): + def __init__(self, size, padding): + if isinstance(size, numbers.Number): + self.size = (int(size), int(size)) + else: + self.size = size + self.padding = padding + + def __call__(self, img_group): + assert (len(self.padding) == len(img_group)) + h, w = img_group[0].shape[0:2] + th, tw = self.size + + out_images = list() + h1 = 0 + w1 = 0 + h2 = max(th - h - h1, 0) + w2 = max(tw - w - w1, 0) + + for img, padding in zip(img_group, self.padding): + assert (img.shape[0] == h and img.shape[1] == w) + out_images.append(cv2.copyMakeBorder( + img, h1, h2, w1, w2, cv2.BORDER_CONSTANT, value=padding)) + if len(img.shape) > len(out_images[-1].shape): + out_images[-1] = out_images[-1][..., + np.newaxis] # single channel image + return out_images + + +class GroupRandomScaleNew(object): + def __init__(self, size=(976, 208), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)): + self.size = size + self.interpolation = interpolation + + def __call__(self, img_group): + assert (len(self.interpolation) == len(img_group)) + scale_w, scale_h = self.size[0] * 1.0 / 1640, self.size[1] * 1.0 / 590 + out_images = list() + for img, interpolation in zip(img_group, self.interpolation): + out_images.append(cv2.resize(img, None, fx=scale_w, + fy=scale_h, interpolation=interpolation)) + if len(img.shape) > len(out_images[-1].shape): + out_images[-1] = out_images[-1][..., + np.newaxis] # single channel image + return out_images + + +class GroupRandomScale(object): + def __init__(self, size=(0.5, 1.5), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)): + self.size = size + self.interpolation = interpolation + + def __call__(self, img_group): + assert (len(self.interpolation) == len(img_group)) + scale = random.uniform(self.size[0], self.size[1]) + out_images = list() + for img, interpolation in zip(img_group, self.interpolation): + out_images.append(cv2.resize(img, None, fx=scale, + fy=scale, interpolation=interpolation)) + if len(img.shape) > len(out_images[-1].shape): + out_images[-1] = out_images[-1][..., + np.newaxis] # single channel image + return out_images + + +class GroupRandomMultiScale(object): + def __init__(self, size=(0.5, 1.5), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)): + self.size = size + self.interpolation = interpolation + + def __call__(self, img_group): + assert (len(self.interpolation) == len(img_group)) + scales = [0.5, 1.0, 1.5] # random.uniform(self.size[0], self.size[1]) + out_images = list() + for scale in scales: + for img, interpolation in zip(img_group, self.interpolation): + out_images.append(cv2.resize( + img, None, fx=scale, fy=scale, interpolation=interpolation)) + if len(img.shape) > len(out_images[-1].shape): + out_images[-1] = out_images[-1][..., + np.newaxis] # single channel image + return out_images + + +class GroupRandomScaleRatio(object): + def __init__(self, size=(680, 762, 562, 592), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)): + self.size = size + self.interpolation = interpolation + self.origin_id = [0, 1360, 580, 768, 255, 300, 680, 710, 312, 1509, 800, 1377, 880, 910, 1188, 128, 960, 1784, + 1414, 1150, 512, 1162, 950, 750, 1575, 708, 2111, 1848, 1071, 1204, 892, 639, 2040, 1524, 832, 1122, 1224, 2295] + + def __call__(self, img_group): + assert (len(self.interpolation) == len(img_group)) + w_scale = random.randint(self.size[0], self.size[1]) + h_scale = random.randint(self.size[2], self.size[3]) + h, w, _ = img_group[0].shape + out_images = list() + out_images.append(cv2.resize(img_group[0], None, fx=w_scale*1.0/w, fy=h_scale*1.0/h, + interpolation=self.interpolation[0])) # fx=w_scale*1.0/w, fy=h_scale*1.0/h + ### process label map ### + origin_label = cv2.resize( + img_group[1], None, fx=w_scale*1.0/w, fy=h_scale*1.0/h, interpolation=self.interpolation[1]) + origin_label = origin_label.astype(int) + label = origin_label[:, :, 0] * 5 + \ + origin_label[:, :, 1] * 3 + origin_label[:, :, 2] + new_label = np.ones(label.shape) * 100 + new_label = new_label.astype(int) + for cnt in range(37): + new_label = ( + label == self.origin_id[cnt]) * (cnt - 100) + new_label + new_label = (label == self.origin_id[37]) * (36 - 100) + new_label + assert(100 not in np.unique(new_label)) + out_images.append(new_label) + return out_images + + +class GroupRandomRotation(object): + def __init__(self, degree=(-10, 10), interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST), padding=None): + self.degree = degree + self.interpolation = interpolation + self.padding = padding + if self.padding is None: + self.padding = [0, 0] + + def __call__(self, img_group): + assert (len(self.interpolation) == len(img_group)) + v = random.random() + if v < 0.5: + degree = random.uniform(self.degree[0], self.degree[1]) + h, w = img_group[0].shape[0:2] + center = (w / 2, h / 2) + map_matrix = cv2.getRotationMatrix2D(center, degree, 1.0) + out_images = list() + for img, interpolation, padding in zip(img_group, self.interpolation, self.padding): + out_images.append(cv2.warpAffine( + img, map_matrix, (w, h), flags=interpolation, borderMode=cv2.BORDER_CONSTANT, borderValue=padding)) + if len(img.shape) > len(out_images[-1].shape): + out_images[-1] = out_images[-1][..., + np.newaxis] # single channel image + return out_images + else: + return img_group + + +class GroupRandomBlur(object): + def __init__(self, applied): + self.applied = applied + + def __call__(self, img_group): + assert (len(self.applied) == len(img_group)) + v = random.random() + if v < 0.5: + out_images = [] + for img, a in zip(img_group, self.applied): + if a: + img = cv2.GaussianBlur( + img, (5, 5), random.uniform(1e-6, 0.6)) + out_images.append(img) + if len(img.shape) > len(out_images[-1].shape): + out_images[-1] = out_images[-1][..., + np.newaxis] # single channel image + return out_images + else: + return img_group + + +class GroupRandomHorizontalFlip(object): + """Randomly horizontally flips the given numpy Image with a probability of 0.5 + """ + + def __init__(self, is_flow=False): + self.is_flow = is_flow + + def __call__(self, img_group, is_flow=False): + v = random.random() + if v < 0.5: + out_images = [np.fliplr(img) for img in img_group] + if self.is_flow: + for i in range(0, len(out_images), 2): + # invert flow pixel values when flipping + out_images[i] = -out_images[i] + return out_images + else: + return img_group + + +class GroupNormalize(object): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, img_group): + out_images = list() + for img, m, s in zip(img_group, self.mean, self.std): + if len(m) == 1: + img = img - np.array(m) # single channel image + img = img / np.array(s) + else: + img = img - np.array(m)[np.newaxis, np.newaxis, ...] + img = img / np.array(s)[np.newaxis, np.newaxis, ...] + out_images.append(img) + + # cv2.imshow('img', (out_images[0] + np.array(self.mean[0])[np.newaxis, np.newaxis, ...]).astype(np.uint8)) + # cv2.imshow('label', (out_images[1] * 100).astype(np.uint8)) + # print(np.unique(out_images[1])) + # cv2.waitKey() + return out_images diff --git a/main.py b/main.py new file mode 100644 index 0000000..729d9b0 --- /dev/null +++ b/main.py @@ -0,0 +1,63 @@ +import os +import torch +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import argparse +from lanedet.utils.config import Config +from lanedet.runner.runner import Runner +from lanedet.datasets import build_dataloader + + +def main(): + args = parse_args() + os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(str(gpu) for gpu in args.gpus) + + cfg = Config.fromfile(args.config) + cfg.gpus = len(args.gpus) + + cfg.load_from = args.load_from + cfg.finetune_from = args.finetune_from + cfg.view = args.view + + cfg.work_dirs = args.work_dirs + '/' + cfg.evaluator.type + + cudnn.benchmark = True + cudnn.fastest = True + + runner = Runner(cfg) + + if args.validate: + val_loader = build_dataloader(cfg.dataset.val, cfg, is_train=False) + runner.validate(val_loader) + else: + runner.train() + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a detector') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--work_dirs', type=str, default='work_dirs', + help='work dirs') + parser.add_argument( + '--load_from', default=None, + help='the checkpoint file to resume from') + parser.add_argument( + '--finetune_from', default=None, + help='whether to finetune from the checkpoint') + parser.add_argument( + '--view', action='store_true', + help='whether to view') + parser.add_argument( + '--validate', + action='store_true', + help='whether to evaluate the checkpoint during training') + parser.add_argument('--gpus', nargs='+', type=int, default='0') + parser.add_argument('--seed', type=int, + default=None, help='random seed') + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + main() diff --git a/requirement.txt b/requirement.txt new file mode 100644 index 0000000..68d8722 --- /dev/null +++ b/requirement.txt @@ -0,0 +1,10 @@ +torch==1.1.0 +torchvision==0.2.0 +pandas +addict +sklearn +opencv-python +pytorch_warmup +scikit-image +tqdm +pprint diff --git a/tools/generate_seg_tusimple.py b/tools/generate_seg_tusimple.py new file mode 100644 index 0000000..cf8273d --- /dev/null +++ b/tools/generate_seg_tusimple.py @@ -0,0 +1,105 @@ +import json +import numpy as np +import cv2 +import os +import argparse + +TRAIN_SET = ['label_data_0313.json', 'label_data_0601.json'] +VAL_SET = ['label_data_0531.json'] +TRAIN_VAL_SET = TRAIN_SET + VAL_SET +TEST_SET = ['test_label.json'] + +def gen_label_for_json(args, image_set): + H, W = 720, 1280 + SEG_WIDTH = 30 + save_dir = args.savedir + + os.makedirs(os.path.join(args.root, args.savedir, "list"), exist_ok=True) + list_f = open(os.path.join(args.root, args.savedir, "list", "{}_gt.txt".format(image_set)), "w") + + json_path = os.path.join(args.root, args.savedir, "{}.json".format(image_set)) + with open(json_path) as f: + for line in f: + label = json.loads(line) + # ---------- clean and sort lanes ------------- + lanes = [] + _lanes = [] + slope = [] # identify 0th, 1st, 2nd, 3rd, 4th, 5th lane through slope + for i in range(len(label['lanes'])): + l = [(x, y) for x, y in zip(label['lanes'][i], label['h_samples']) if x >= 0] + if (len(l)>1): + _lanes.append(l) + slope.append(np.arctan2(l[-1][1]-l[0][1], l[0][0]-l[-1][0]) / np.pi * 180) + _lanes = [_lanes[i] for i in np.argsort(slope)] + slope = [slope[i] for i in np.argsort(slope)] + + idx = [None for i in range(6)] + for i in range(len(slope)): + if slope[i] <= 90: + idx[2] = i + idx[1] = i-1 if i > 0 else None + idx[0] = i-2 if i > 1 else None + else: + idx[3] = i + idx[4] = i+1 if i+1 < len(slope) else None + idx[5] = i+2 if i+2 < len(slope) else None + break + for i in range(6): + lanes.append([] if idx[i] is None else _lanes[idx[i]]) + + # --------------------------------------------- + + img_path = label['raw_file'] + seg_img = np.zeros((H, W, 3)) + list_str = [] # str to be written to list.txt + for i in range(len(lanes)): + coords = lanes[i] + if len(coords) < 4: + list_str.append('0') + continue + for j in range(len(coords)-1): + cv2.line(seg_img, coords[j], coords[j+1], (i+1, i+1, i+1), SEG_WIDTH//2) + list_str.append('1') + + seg_path = img_path.split("/") + seg_path, img_name = os.path.join(args.root, args.savedir, seg_path[1], seg_path[2]), seg_path[3] + os.makedirs(seg_path, exist_ok=True) + seg_path = os.path.join(seg_path, img_name[:-3]+"png") + cv2.imwrite(seg_path, seg_img) + + seg_path = "/".join([args.savedir, *img_path.split("/")[1:3], img_name[:-3]+"png"]) + if seg_path[0] != '/': + seg_path = '/' + seg_path + if img_path[0] != '/': + img_path = '/' + img_path + list_str.insert(0, seg_path) + list_str.insert(0, img_path) + list_str = " ".join(list_str) + "\n" + list_f.write(list_str) + + +def generate_json_file(save_dir, json_file, image_set): + with open(os.path.join(save_dir, json_file), "w") as outfile: + for json_name in (image_set): + with open(os.path.join(args.root, json_name)) as infile: + for line in infile: + outfile.write(line) + +def generate_label(args): + save_dir = os.path.join(args.root, args.savedir) + os.makedirs(save_dir, exist_ok=True) + generate_json_file(save_dir, "train_val.json", TRAIN_VAL_SET) + generate_json_file(save_dir, "test.json", TEST_SET) + + print("generating train_val set...") + gen_label_for_json(args, 'train_val') + print("generating test set...") + gen_label_for_json(args, 'test') + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--root', required=True, help='The root of the Tusimple dataset') + parser.add_argument('--savedir', type=str, default='seg_label', help='The root of the Tusimple dataset') + args = parser.parse_args() + + generate_label(args)