@inproceedings{bi2023vl,
title={VL-Match: Enhancing vision-language pretraining with token-level and instance-level matching},
author={Bi, Junyu and Cheng, Daixuan and Yao, Ping and Pang, Bochen and Zhan, Yuefeng and Yang, Chuanguang and Wang, Yujing and Sun, Hao and Deng, Weiwei and Zhang, Qi},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={2584--2593},
year={2023}
}