tensorflow · mjyun01 · Jul 24, 2023 · Jul 27, 2023 · Jul 27, 2023 · Jul 31, 2023
@@ -0,0 +1,56 @@
+
+# Road Network Graph Detection by Transformer
+
+[![RNGDet](https://img.shields.io/badge/RNGDet-arXiv.2202.07824-B3181B?)](https://arxiv.org/abs/2202.07824)
+[![RNGDet++](https://img.shields.io/badge/RNGDet++-arXiv.2209.10150-B3181B?)](https://arxiv.org/abs/2209.10150)
+
+## Environment setup
+The code can be run on multiple GPUs or TPUs with different distribution
+strategies. See the TensorFlow distributed training
+[guide](https://www.tensorflow.org/guide/distributed_training) for an overview
+of `tf.distribute`.
+
+## Data preparation
+To download the dataset and generate labels, try the following command:
+
+```
+cd data
+./prepare_dataset.bash
+```
+
+To generate training samples, try the following command:
+
+```
+python create_cityscale_tf_record.py \
+    --dataroot ./dataset/ \
+    --roi_size 128 \
+    --image_size 2048 \
+    --edge_move_ahead_length 30 \
+    --num_queries 10 \
+    --noise 8 \
+    --max_num_frame 10000 \
+    --num_shards 32
+```
+## Training 
+To edit training options of RNGDet, you can edit following commands in do_train.sh :
+
+```
+CUDA_VISIBLE_DEVICES=4 python3 train.py \
+  --mode=train \
+  --experiment=rngdet_cityscale  \
+  --model_dir=./CKPT_DIR_NAME \
+  --config_file=./configs/experiments/cityscale_rngdet_r50_gpu.yaml \
+```
+
+To start training, try the following command : 
+```
+sh do_train.sh 
+```
+
+## Evaluation 
+To evaluate one image with internal step visualization,  
+
+```
+python run_rngdet_basic.py -ckpt ./CKPT_DIR_NAME
+```
+
@@ -0,0 +1,9 @@
+runtime:
+  distribution_strategy: 'mirrored'
+  mixed_precision_dtype: 'float32'
+  num_gpus: 1
+task:
+  train_data:
+    dtype: 'float32'
+  validation_data:
+    dtype: 'float32'
@@ -0,0 +1,8 @@
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+task:
+  train_data:
+    dtype: 'float32'
+  validation_data:
+    dtype: 'float32'
@@ -0,0 +1,223 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""DETR configurations."""
+
+import dataclasses
+import os
+from typing import List, Optional, Union
+
+from official.core import config_definitions as cfg
+from official.core import exp_factory
+from official.modeling import hyperparams
+from official.modeling import optimization
+from official.vision.configs import common
+from official.vision.configs import decoders
+from official.vision.configs import backbones
+
+
+@dataclasses.dataclass
+class DataConfig(cfg.DataConfig):
+  """Input config for training."""
+  input_path: str = ''
+  tfds_name: str = ''
+  tfds_split: str = 'train'
+  global_batch_size: int = 0
+  is_training: bool = False
+  dtype: str = 'float32'
+  decoder: common.DataDecoder = dataclasses.field(default_factory=common.DataDecoder)
+  shuffle_buffer_size: int = 10000
+  file_type: str = 'tfrecord'
+  drop_remainder: bool = True
+
+
+@dataclasses.dataclass
+class Losses(hyperparams.Config):
+  lambda_cls: float = 1.0
+  lambda_box: float = 5.0
+  lambda_ins: float = 1.0
+  background_cls_weight: float = 0.2
+
+@dataclasses.dataclass
+class Rngdet(hyperparams.Config):
+  """Rngdet model definations."""
+  num_queries: int = 10
+  hidden_size: int = 256
+  num_classes: int = 2  # 0: vertices, 1: background
+  num_encoder_layers: int = 6
+  num_decoder_layers: int = 6 
+  input_size: List[int] = dataclasses.field(default_factory=list)
+  roi_size: int = 128
+  backbone: backbones.Backbone = dataclasses.field(default_factory=lambda:backbones.Backbone(
+      type='resnet', resnet=backbones.ResNet(model_id=50, bn_trainable=False)))
+  decoder: decoders.Decoder = dataclasses.field(
+      default_factory=lambda: decoders.Decoder(type='fpn', fpn=decoders.FPN())
+  )
+  min_level: int = 2
+  max_level: int = 5
+  norm_activation: common.NormActivation = dataclasses.field(default_factory=common.NormActivation)
+  backbone_endpoint_name: str = '5'
+
+
+@dataclasses.dataclass
+class RngdetTask(cfg.TaskConfig):
+  model: Rngdet = dataclasses.field(default_factory=Rngdet)
+  train_data: cfg.DataConfig = dataclasses.field(default_factory=cfg.DataConfig)
+  validation_data: cfg.DataConfig = dataclasses.field(default_factory=cfg.DataConfig)
+  losses: Losses = dataclasses.field(default_factory=Losses)
+  init_checkpoint: Optional[str] = None
+  init_checkpoint_modules: Union[str, List[str]] = 'all'  # all, backbone
+  per_category_metrics: bool = False
+
+
+CITYSCALE_TRAIN_EXAMPLES = 420140
+datapath = os.getenv("DATAPATH", "/data2/cityscale/tfrecord/")
+CITYSCALE_INPUT_PATH_BASE = datapath 
+CITYSCALE_VAL_EXAMPLES = 5000
+
+@exp_factory.register_config_factory('rngdet_cityscale')
+def rngdet_cityscale() -> cfg.ExperimentConfig:
+  """Config to get results that matches the paper."""
+  train_batch_size = 32
+  eval_batch_size = 16
+  steps_per_epoch = CITYSCALE_TRAIN_EXAMPLES // train_batch_size
+  train_steps = 50 * steps_per_epoch  # 50 epochs
+  config = cfg.ExperimentConfig(
+      task=RngdetTask(
+          init_checkpoint='gs://ghpark-imagenet-tfrecord/ckpt/resnet50_imagenet',
+          init_checkpoint_modules='backbone',
+          model=Rngdet(
+              input_size=[128, 128, 3],
+              roi_size=128,
+              norm_activation=common.NormActivation()),
+          losses=Losses(),
+          train_data=DataConfig(
+              input_path=os.path.join(CITYSCALE_INPUT_PATH_BASE, 'train-noise*'),
+              is_training=True,
+              global_batch_size=train_batch_size,
+              shuffle_buffer_size=1000,
+          ),
+          validation_data=DataConfig(
+              input_path=os.path.join(CITYSCALE_INPUT_PATH_BASE, 'train-noise*'),
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              drop_remainder=False,
+          )),
+      trainer=cfg.TrainerConfig(
+          train_steps=train_steps,
+          validation_steps=CITYSCALE_VAL_EXAMPLES // eval_batch_size,
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=1*steps_per_epoch,
+          validation_interval=1*steps_per_epoch,
+          max_to_keep=1,
+          best_checkpoint_export_subdir='best_ckpt',
+          best_checkpoint_eval_metric='AP',
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'adamw_experimental',
+                  'adamw_experimental': {
+                      'epsilon': 1.0e-08,
+                      'weight_decay': 1.0e-05,
+                      'global_clipnorm': -1.0,
+                  },
+              },
+              'learning_rate': {
+                  'type': 'polynomial',
+                  'polynomial': {
+                      'initial_learning_rate': 0.0001,
+                      'end_learning_rate': 0.000001,
+                      'offset': 0,
+                      'power': 1.0,
+                      'decay_steps': 10 * steps_per_epoch,
+                  },
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 2 * steps_per_epoch,
+                      'warmup_learning_rate': 0,
+                  },
+              },
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+      ])
+  return config
+
+
+
+@exp_factory.register_config_factory('rngdet_cityscale_detr')
+def rngdet_cityscale() -> cfg.ExperimentConfig:
+  """Config to get results that matches the paper."""
+  train_batch_size = 32
+  eval_batch_size = 16
+  steps_per_epoch = CITYSCALE_TRAIN_EXAMPLES // train_batch_size
+  train_steps = 50 * steps_per_epoch  # 50 epochs
+  config = cfg.ExperimentConfig(
+      task=RngdetTask(
+          init_checkpoint='gs://ghpark-imagenet-tfrecord/ckpt/resnet50_imagenet',
+          init_checkpoint_modules='backbone',
+          model=Rngdet(
+              input_size=[128, 128, 3],
+              roi_size=128,
+              norm_activation=common.NormActivation()),
+          losses=Losses(),
+          train_data=DataConfig(
+              input_path=os.path.join(CITYSCALE_INPUT_PATH_BASE, 'train-noise*'),
+              is_training=True,
+              global_batch_size=train_batch_size,
+              shuffle_buffer_size=1000,
+          ),
+          validation_data=DataConfig(
+              input_path=os.path.join(CITYSCALE_INPUT_PATH_BASE, 'train_noise*'),
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              drop_remainder=False,
+          )),
+      trainer=cfg.TrainerConfig(
+          train_steps=train_steps,
+          validation_steps=CITYSCALE_VAL_EXAMPLES // eval_batch_size,
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=1*steps_per_epoch,
+          validation_interval=1*steps_per_epoch,
+          max_to_keep=1,
+          best_checkpoint_export_subdir='best_ckpt',
+          best_checkpoint_eval_metric='AP',
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'adamw',
+                  'adamw': {
+                      'weight_decay_rate': 1e-5,
+                      'epsilon': 1e-08,
+                      'global_clipnorm': 0.1,
+                      # Avoid AdamW legacy behavior.
+                      'gradient_clip_norm': 0.0
+                  }
+              },
+              'learning_rate': {
+                  'type': 'stepwise',
+                  'stepwise': {
+                      'boundaries': [20 * steps_per_epoch,
+                                     30 * steps_per_epoch,
+                                     40 * steps_per_epoch],
+                      'values': [1.0e-05, 1.0e-05, 1.0e-06, 1.0e-07]
+                  }
+              },
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+      ])
+  return config
@@ -0,0 +1,85 @@
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for tensorflow_models.official.projects.detr.detr."""
+import tensorflow as tf
+from official.projects.rngdet.modeling import rngdet
+from official.vision.modeling.backbones import resnet
+from official.vision.modeling.decoders import fpn
+
+
+class DetrTest(tf.test.TestCase):
+
+  def test_forward(self):
+    num_queries = 10
+    hidden_size = 128
+    num_classes = 2
+    image_size = 128
+    input_size = [image_size,image_size,3]
+    batch_size = 64
+
+    backbone = resnet.ResNet(50, bn_trainable=False)
+    backbone_endpoint_name = '5'
+    history_specs = tf.keras.layers.InputSpec(
+        shape=[None] + input_size[:2] + [3])
+    backbone_history = resnet.ResNet(50,
+                                     input_specs=history_specs,
+                                     bn_trainable=False)
+    segment_fpn = fpn.FPN(backbone.output_specs,
+                           min_level=2,
+                           max_level=5)
+    keypoint_fpn = fpn.FPN(backbone.output_specs,
+                           min_level=2,
+                           max_level=5)
+
+    transformer = rngdet.DETRTransformer(
+        hidden_size= hidden_size,
+        num_encoder_layers=6,
+        num_decoder_layers=6)
+
+    multi_scale = rngdet.MultiScale( 
+        transformer, 
+        dim=transformer._hidden_size, 
+        nheads=transformer._num_heads, 
+        fpn_dims= [2048, 1024, 512, 256], 
+        output_size = 128  )
+
+    model = rngdet.RNGDet(backbone,
+                      backbone_history,
+                      backbone_endpoint_name,
+                      segment_fpn,
+                      keypoint_fpn,
+                      transformer,
+                      multi_scale,
+                      num_queries,
+                      hidden_size,
+                      num_classes  ) 
+
+    test_input = tf.ones((batch_size, image_size, image_size, 3))
+    test_history = tf.ones((batch_size, image_size, image_size, 1))
+    outs = model(test_input, test_history, training=True)
+
+    self.assertLen(outs, 3)  # intermediate decoded outputs.
+
+    self.assertAllEqual(
+        tf.shape(outs[0]['cls_outputs']), (batch_size, num_queries, num_classes))
+    self.assertAllEqual(
+        tf.shape(outs[0]['box_outputs']), (batch_size, num_queries, num_classes))
+    self.assertAllEqual(
+        tf.shape(outs[1]), (batch_size, hidden_size, hidden_size, 1))
+    self.assertAllEqual(
+        tf.shape(outs[2]), (batch_size, hidden_size, hidden_size, 1))
+
+if __name__ == '__main__':
+  tf.test.main()