@inproceedings{bi2023vl, title={VL-Match: Enhancing vision-language pretraining with token-level and instance-level matching}, author={Bi, Junyu and Cheng, Daixuan and Yao, Ping and Pang, Bochen and Zhan, Yuefeng and Yang, Chuanguang and Wang, Yujing and Sun, Hao and Deng, Weiwei and Zhang, Qi}, booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, pages={2584--2593}, year={2023} }