Skip to content

Commit a3c9766

Browse files
committed
chore(base): updates demo and weights in codebase.
* eval and submit: new save zip file name and output the command directly * dataloader: add num_frames into dataloader for afterward using. * demo data & weights: update all weights for preprocess_v2 data where ego pose center in sensor center. * config: update monitor metric to new one for all mode config.
1 parent b1d1938 commit a3c9766

23 files changed

Lines changed: 178 additions & 75 deletions

1_train.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ def main(cfg):
3232
raise ValueError("Please specify the self-supervised loss items for seflowLoss.")
3333
pl.seed_everything(cfg.seed, workers=True)
3434

35-
train_dataset = HDF5Dataset(cfg.train_data, dufo=(cfg.loss_fn == 'seflowLoss'))
35+
train_dataset = HDF5Dataset(cfg.train_data, n_frames=cfg.num_frames, dufo=(cfg.loss_fn == 'seflowLoss'))
3636
train_loader = DataLoader(train_dataset,
3737
batch_size=cfg.batch_size,
3838
shuffle=True,
3939
num_workers=cfg.num_workers,
4040
collate_fn=collate_fn_pad,
4141
pin_memory=True)
42-
val_loader = DataLoader(HDF5Dataset(cfg.val_data),
42+
val_loader = DataLoader(HDF5Dataset(cfg.val_data, n_frames=cfg.num_frames),
4343
batch_size=cfg.batch_size,
4444
shuffle=False,
4545
num_workers=cfg.num_workers,
@@ -52,8 +52,8 @@ def main(cfg):
5252

5353
# only for logging on folder name.
5454
if cfg.loss_fn == 'seflowLoss':
55+
cfg.output = cfg.output.replace(cfg.model.name, "seflow")
5556
method_name = "seflow"
56-
cfg.output = cfg.output.replace("deflow", "seflow")
5757
else:
5858
method_name = cfg.model.name
5959
output_dir = HydraConfig.get().runtime.output_dir + f"/{cfg.output}"

2_eval.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@ def main(cfg):
2727
if not os.path.exists(cfg.checkpoint):
2828
print(f"Checkpoint {cfg.checkpoint} does not exist. Need checkpoints for evaluation.")
2929
sys.exit(1)
30-
31-
checkpoint_params = DictConfig(torch.load(cfg.checkpoint)["hyper_parameters"])
32-
cfg.output = checkpoint_params.cfg.output + f"-{cfg.av2_mode}-v{cfg.leaderboard_version}"
30+
31+
torch_load_ckpt = torch.load(cfg.checkpoint)
32+
checkpoint_params = DictConfig(torch_load_ckpt["hyper_parameters"])
33+
cfg.output = checkpoint_params.cfg.output + f"-e{torch_load_ckpt['epoch']}-{cfg.av2_mode}-v{cfg.leaderboard_version}"
3334
cfg.model.update(checkpoint_params.cfg.model)
35+
3436
mymodel = ModelWrapper.load_from_checkpoint(cfg.checkpoint, cfg=cfg, eval=True)
3537
print(f"\n---LOG[eval]: Loaded model from {cfg.checkpoint}. The model is {checkpoint_params.cfg.model.name}.\n")
3638

@@ -43,7 +45,7 @@ def main(cfg):
4345
trainer = pl.Trainer(logger=wandb_logger, devices=1)
4446
# NOTE(Qingwen): search & check: def eval_only_step_(self, batch, res_dict)
4547
trainer.validate(model = mymodel, \
46-
dataloaders = DataLoader(HDF5Dataset(cfg.dataset_path + f"/{cfg.av2_mode}", eval=True, leaderboard_version=cfg.leaderboard_version), batch_size=1, shuffle=False))
48+
dataloaders = DataLoader(HDF5Dataset(cfg.dataset_path + f"/{cfg.av2_mode}", n_frames=checkpoint_params.cfg.num_frames, eval=True, leaderboard_version=cfg.leaderboard_version), batch_size=1, shuffle=False))
4749
wandb.finish()
4850

4951
if __name__ == "__main__":

3_vis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def main(cfg):
4848
trainer = pl.Trainer(logger=wandb_logger, devices=1)
4949
# NOTE(Qingwen): search & check in pl_model.py : def test_step(self, batch, res_dict)
5050
trainer.test(model = mymodel, \
51-
dataloaders = DataLoader(HDF5Dataset(cfg.dataset_path), batch_size=1, shuffle=False))
51+
dataloaders = DataLoader(HDF5Dataset(cfg.dataset_path, n_frames=checkpoint_params.cfg.num_frames), batch_size=1, shuffle=False))
5252
wandb.finish()
5353

5454
if __name__ == "__main__":

README.md

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ SeFlow: A Self-Supervised Scene Flow Method in Autonomous Driving
66
[poster comming soon]
77
[video coming soon]
88

9-
2024/07/09 16:34: I'm working on updating code here now. **Not fully ready yet** until Jul'15.
9+
2024/07/16 17:18: Most of codes already uploaded and tested. You can to try training directly by downloading demo data. The process script will be ready when the paper published.
1010

11-
Pre-trained weights for models are available in [Zenodo](https://zenodo.org/records/12632962) link. Check usage in [2. Evaluation](#2-evaluation) or [3. Visualization](#3-visualization).
11+
Pre-trained weights for models are available in [Zenodo](https://zenodo.org/records/12751363) link. Check usage in [2. Evaluation](#2-evaluation) or [3. Visualization](#3-visualization).
1212

1313
Task: __Self-Supervised__ Scene Flow Estimation in Autonomous Driving. No human-label needed. Real-time inference (15-20Hz in RTX3090).
1414

@@ -32,9 +32,10 @@ You can try following methods in our code without any effort to make your own be
3232
- [x] [SeFlow](https://arxiv.org/abs/2407.01702) (Ours 🚀): ECCV 2024
3333
- [x] [DeFlow](https://arxiv.org/abs/2401.16122) (Ours 🚀): ICRA 2024
3434
- [x] [FastFlow3d](https://arxiv.org/abs/2103.01306): RA-L 2021
35-
- [x] [ZeroFlow](https://arxiv.org/abs/2305.10424): ICLR 2024, their pre-trained weight can covert into our format easily through [the script](TODO).
35+
- [x] [ZeroFlow](https://arxiv.org/abs/2305.10424): ICLR 2024, their pre-trained weight can covert into our format easily through [the script](tests/zerof2ours.py).
3636
- [ ] [NSFP](https://arxiv.org/abs/2111.01253): NeurIPS 2021, faster 3x than original version because of [our CUDA speed up](assets/cuda/README.md), same (slightly better) performance. Done coding, public after review.
3737
- [ ] [FastNSF](https://arxiv.org/abs/2304.09121): ICCV 2023. Done coding, public after review.
38+
<!-- - [ ] [Flow4D](https://arxiv.org/abs/2407.07995): 1st supervise network in the new leaderboard. Done coding, public after review. -->
3839
- [ ] ... more on the way
3940

4041
</details>
@@ -51,7 +52,7 @@ cd SeFlow && mamba env create -f environment.yaml
5152
CUDA package (need install nvcc compiler), the compile time is around 1-5 minutes:
5253
```bash
5354
mamba activate seflow
54-
# change it if you use different cuda version (I tested 11.3, 11.4, 11.7 all works)
55+
# change it if you use different cuda version (I tested 11.3, 11.4, 11.7, 11.8 all works)
5556
export PATH=/usr/local/cuda-11.7/bin:$PATH
5657
export LD_LIBRARY_PATH=/usr/local/cuda-11.7/lib64:$LD_LIBRARY_PATH
5758

@@ -71,25 +72,23 @@ docker run -it --gpus all -v /dev/shm:/dev/shm -v /home/kin/data:/home/kin/data
7172

7273
## 1. Run & Train
7374

74-
Note: Prepare raw data and process train data only needed run once for the task. No need repeat the data process steps till you delete all data.
75+
Note: Prepare raw data and process train data only needed run once for the task. No need repeat the data process steps till you delete all data. We use [wandb](https://wandb.ai/) to log the training process, and you may want to change all `entity="kth-rpl"` to your own entity.
7576

7677
### Data Preparation
7778

78-
Check [dataprocess/README.md](dataprocess/README.md#argoverse-20) for downloading tips for the raw Argoverse 2 dataset.
79+
Check [dataprocess/README.md](dataprocess/README.md#argoverse-20) for downloading tips for the raw Argoverse 2 dataset. Or maybe you only want to have the **mini processed dataset** to try the code quickly, We directly provide one scene inside `train` and `val`. It already converted to `.h5` format and processed with the label data.
80+
You can download it from [Zenodo](https://zenodo.org/record/12751363) and extract it to the data folder.
7981

80-
Maybe you only want to have the mini processed dataset to try the code quickly, We directly provide one scene inside `train` and `val`. It already converted to `.h5` format and processed with the label data.
81-
<!-- You can download it from [Zenodo](https://zenodo.org/record/12632962) and extract it to the data folder. -->
8282
```bash
83-
# TODO: update the link later when the data is ready
84-
# wget https://zenodo.org/record/12632962/files/demo_data.zip
83+
wget https://zenodo.org/record/12751363/files/demo_data.zip
8584
unzip demo_data.zip -p /home/kin/data/av2
8685
```
8786

8887
#### Prepare raw data
8988

9089
Extract all data to unified h5 format. [Runtime: Normally need 10 mins finished run following commands totally in my desktop, 45 mins for the cluster I used]
9190
```bash
92-
python dataprocess/extract_av2.py --av2_type sensor --data_mode train --argo_dir /home/kin/data/av2 --output_dir /home/kin/data/av2/preprocess
91+
python dataprocess/extract_av2.py --av2_type sensor --data_mode train --argo_dir /home/kin/data/av2 --output_dir /home/kin/data/av2/preprocess_v2
9392
python dataprocess/extract_av2.py --av2_type sensor --data_mode val --mask_dir /home/kin/data/av2/3d_scene_flow
9493
python dataprocess/extract_av2.py --av2_type sensor --data_mode test --mask_dir /home/kin/data/av2/3d_scene_flow
9594
```
@@ -99,17 +98,19 @@ python dataprocess/extract_av2.py --av2_type sensor --data_mode test --mask_dir
9998
Process train data for self-supervised learning. Only training data needs this step. [Runtime: Normally need 15 hours for my desktop, 3 hours for the cluster with five available nodes parallel running.]
10099

101100
```bash
102-
python 0_process.py --data_dir /home/kin/data/av2/preprocess/sensor/train --scene_range 0,701
101+
python 0_process.py --data_dir /home/kin/data/av2/preprocess_v2/sensor/train --scene_range 0,701
103102
```
104103

105104
### Train the model
106105

107-
Train SeFlow needed to specify the loss function, we set the config of our best model in the leaderboard. [Runtime: Around 18 hours in 4x A100 GPUs.]
106+
Train SeFlow needed to specify the loss function, we set the config of our best model in the leaderboard. [Runtime: Around 11 hours in 4x A100 GPUs.]
108107

109108
```bash
110-
python 1_train.py model=deflow lr=2e-4 epochs=20 batch_size=16 loss_fn=seflowLoss "add_seloss={chamfer_dis: 1.0, static_flow_loss: 1.0, dynamic_chamfer_dis: 1.0, cluster_based_pc0pc1: 1.0}" "model.target.num_iters=2" "model.val_monitor=val/Dynamic/Mean"
109+
python 1_train.py model=deflow lr=2e-4 epochs=9 batch_size=16 loss_fn=seflowLoss "add_seloss={chamfer_dis: 1.0, static_flow_loss: 1.0, dynamic_chamfer_dis: 1.0, cluster_based_pc0pc1: 1.0}" "model.target.num_iters=2" "model.val_monitor=val/Dynamic/Mean"
111110
```
112111

112+
Or you can directly download the pre-trained weight from [Zenodo](https://zenodo.org/records/12751363/files/seflow_best.ckpt) and skip the training step.
113+
113114
### Other Benchmark Models
114115

115116
You can also train the supervised baseline model in our paper with the following command. [Runtime: Around 10 hours in 4x A100 GPUs.]
@@ -129,27 +130,36 @@ Since in training, we save all hyper-parameters and model checkpoints, the only
129130

130131
```bash
131132
# downloaded pre-trained weight, or train by yourself
132-
wget https://zenodo.org/records/12632962/files/seflow_official.ckpt
133+
wget https://zenodo.org/records/12751363/files/seflow_best.ckpt
133134

134135
# it will directly prints all metric
135-
python 2_eval.py checkpoint=/home/kin/seflow_official.ckpt av2_mode=val
136+
python 2_eval.py checkpoint=/home/kin/seflow_best.ckpt av2_mode=val
136137

137138
# it will output the av2_submit.zip or av2_submit_v2.zip for you to submit to leaderboard
138-
python 2_eval.py checkpoint=/home/kin/seflow_official.ckpt av2_mode=test leaderboard_version=1
139-
python 2_eval.py checkpoint=/home/kin/seflow_official.ckpt av2_mode=test leaderboard_version=2
139+
python 2_eval.py checkpoint=/home/kin/seflow_best.ckpt av2_mode=test leaderboard_version=1
140+
python 2_eval.py checkpoint=/home/kin/seflow_best.ckpt av2_mode=test leaderboard_version=2
140141
```
141142

142143

143144
## 3. Visualization
144145

145-
We provide a script to visualize the results of the model. You can specify the checkpoint path and the data path to visualize the results. The step is quickly similar to evaluation.
146+
We provide a script to visualize the results of the model also. You can specify the checkpoint path and the data path to visualize the results. The step is quickly similar to evaluation.
146147

147148
```bash
149+
python 3_vis.py checkpoint=/home/kin/seflow_best.ckpt dataset_path=/home/kin/data/av2/preprocess_v2/sensor/vis
150+
151+
# The output of above command will be like:
152+
Model: DeFlow, Checkpoint from: /home/kin/model_zoo/v2/seflow_best.ckpt
153+
We already write the flow_est into the dataset, please run following commend to visualize the flow. Copy and paste it to your terminal:
154+
python tests/scene_flow.py --flow_mode 'seflow_best' --data_dir /home/kin/data/av2/preprocess_v2/sensor/vis
155+
Enjoy! ^v^ ------
148156

149157
# Then run the command in the terminal:
150-
python tests/scene_flow.py --flow_mode 'seflow_official' --data_dir /home/kin/data/av2/preprocess/sensor/mini
158+
python tests/scene_flow.py --flow_mode 'seflow_best' --data_dir /home/kin/data/av2/preprocess_v2/sensor/vis
151159
```
152160

161+
https://github.com/user-attachments/assets/f031d1a2-2d2f-4947-a01f-834ed1c146e6
162+
153163

154164
## Cite & Acknowledgements
155165

assets/slurm/0_process.sh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,29 @@
77
#SBATCH -t 1-00:00:00
88
#SBATCH --mail-type=END,FAIL
99
#SBATCH --mail-user=qingwen@kth.se
10-
#SBATCH --output /proj/berzelius-2023-154/users/x_qinzh/seflow/logs/slurm/%J_data.out
11-
#SBATCH --error /proj/berzelius-2023-154/users/x_qinzh/seflow/logs/slurm/%J_data.err
10+
#SBATCH --output /proj/berzelius-2023-154/users/x_qinzh/workspace/SeFlow/logs/slurm/%J_data.out
11+
#SBATCH --error /proj/berzelius-2023-154/users/x_qinzh/workspace/SeFlow/logs/slurm/%J_data.err
1212

13-
cd /proj/berzelius-2023-154/users/x_qinzh/seflow
13+
cd /proj/berzelius-2023-154/users/x_qinzh/workspace/SeFlow
1414
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/proj/berzelius-2023-154/users/x_qinzh/mambaforge/lib
1515
# export HYDRA_FULL_ERROR=1
1616

17-
/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python dataprocess/extract_av2.py \
17+
/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/dataprocess/bin/python dataprocess/extract_av2.py --nproc 64 \
1818
--av2_type sensor \
1919
--data_mode train \
2020
--argo_dir /proj/berzelius-2023-154/users/x_qinzh/av2 \
21-
--output_dir /proj/berzelius-2023-154/users/x_qinzh/av2/preprocess_v2
21+
--output_dir /proj/berzelius-2023-364/users/x_qinzh/data/av2/preprocess_v2
2222

23-
/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python dataprocess/extract_av2.py \
23+
/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/dataprocess/bin/python dataprocess/extract_av2.py --nproc 64 \
2424
--av2_type sensor \
2525
--data_mode val \
2626
--argo_dir /proj/berzelius-2023-154/users/x_qinzh/av2 \
27-
--output_dir /proj/berzelius-2023-154/users/x_qinzh/av2/preprocess_v2 \
27+
--output_dir /proj/berzelius-2023-364/users/x_qinzh/data/av2/preprocess_v2 \
2828
--mask_dir /proj/berzelius-2023-154/users/x_qinzh/av2/3d_scene_flow
2929

30-
/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python dataprocess/extract_av2.py \
30+
/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/dataprocess/bin/python dataprocess/extract_av2.py --nproc 64 \
3131
--av2_type sensor \
3232
--data_mode test \
3333
--argo_dir /proj/berzelius-2023-154/users/x_qinzh/av2 \
34-
--output_dir /proj/berzelius-2023-154/users/x_qinzh/av2/preprocess_v2 \
34+
--output_dir /proj/berzelius-2023-364/users/x_qinzh/data/av2/preprocess_v2 \
3535
--mask_dir /proj/berzelius-2023-154/users/x_qinzh/av2/3d_scene_flow

assets/slurm/1_train.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,5 @@ echo "Start training..."
3333
# ====> leaderboard model = seflow_best
3434
/proj/berzelius-2023-154/users/x_qinzh/mambaforge/envs/seflow/bin/python 1_train.py \
3535
slurm_id=$SLURM_JOB_ID wandb_mode=online train_data=/scratch/local/av2/sensor/train val_data=/scratch/local/av2/sensor/val \
36-
num_workers=16 model=deflow lr=2e-4 epochs=20 batch_size=16 "model.target.num_iters=2" "model.val_monitor=val/Dynamic/Mean" \
36+
num_workers=16 model=deflow lr=2e-4 epochs=9 batch_size=16 "model.target.num_iters=2" "model.val_monitor=val/Dynamic/Mean" \
3737
loss_fn=seflowLoss "add_seloss={chamfer_dis: 1.0, static_flow_loss: 1.0, dynamic_chamfer_dis: 1.0, cluster_based_pc0pc1: 1.0}"

conf/config.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ slurm_id: 00000
88
wandb_mode: offline # [offline, disabled, online]
99
wandb_project_name: seflow
1010

11-
train_data: /home/kin/data/dataset/av2/preprocess/sensor/train
12-
val_data: /home/kin/data/dataset/av2/preprocess/sensor/val
11+
train_data: /home/kin/data/dataset/av2/demo/preprocess_v2/sensor/train
12+
val_data: /home/kin/data/dataset/av2/demo/preprocess_v2/sensor/val
1313

1414
output: ${model.name}-${slurm_id}
1515

@@ -33,9 +33,10 @@ add_seloss:
3333
# log settings
3434
seed: 42069
3535
log_every: 10 # steps epochs*dataset_size/batch_size
36-
val_every: 5 # epochs
36+
val_every: 3 # epochs
3737
save_top_model: 5 # top_k model will be saved.
3838

3939
# -----> Model and Task Parameters
4040
voxel_size: [0.2, 0.2, 6]
41-
point_cloud_range: [-51.2, -51.2, -3, 51.2, 51.2, 3]
41+
point_cloud_range: [-51.2, -51.2, -3, 51.2, 51.2, 3]
42+
num_frames: 2

conf/eval.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11

2-
dataset_path: /home/kin/data/av2/preprocess/sensor
2+
dataset_path: /home/kin/data/av2/preprocess_v2/sensor
33
checkpoint: /home/kin/model_zoo/deflow.ckpt
44
av2_mode: val # [val, test]
55
save_res: False # [True, False]
66

7-
leaderboard_version: 1 # [1, 2]
7+
leaderboard_version: 2 # [1, 2]
88
supervised_flag: True # [True, False], whether you use any label from the dataset
99

1010
# no need to change
@@ -14,7 +14,6 @@ gpus: 1
1414
seed: 42069
1515
eval_only: True
1616
wandb_mode: offline # [offline, disabled, online]
17-
1817
defaults:
1918
- hydra: default
2019
- model: deflow

conf/model/deflow.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ target:
77
voxel_size: ${voxel_size}
88
point_cloud_range: ${point_cloud_range}
99

10-
val_monitor: val/Three-way
10+
val_monitor: val/Dynamic/Mean

conf/model/fastflow3d.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ target:
55
bottleneck_head: False
66
voxel_size: ${voxel_size}
77
point_cloud_range: ${point_cloud_range}
8-
9-
val_monitor: val/Three-way
8+
9+
val_monitor: val/Dynamic/Mean

0 commit comments

Comments
 (0)