@inproceedings{a7adb71dcd4b41a0a09044920ea7bb7e,
title = "PRESIDE: A Judge Entity Recognition and Disambiguation Model for US District Court Records",
abstract = "The docket sheet of a court case contains a wealth of information about the progression of a case, the parties' and judge's decision-making along the way, and the case's ultimate outcome that can be used in analytical applications. However, the unstructured text of the docket sheet and the terse and variable phrasing of docket entries require the development of new models to identify key entities to enable analysis at a systematic level. We developed a judge entity recognition language model and disambiguation pipeline for US District Court records. Our model can robustly identify mentions of judicial entities in free text (~99% F-1 Score) and outperforms general state-of-the-art language models by 13%. Our disambiguation pipeline is able to robustly identify both appointed and non-appointed judicial actors and correctly infer the type of appointment (~99% precision). Lastly, we show with a case study on in forma pauperis decision-making that there is substantial error (~30%) attributing decision outcomes to judicial actors if the free text of the docket is not used to make the identification and attribution.",
keywords = "court records, disambiguation, judicial entities, named entity recognition",
author = "Pah, {Adam R.} and Rozolis, {Christian J.} and Schwartz, {David L} and Alexander, {Charlotte S.} and {Okn Consortium}, Scales",
note = "Funding Information: This work was supported by the National Science Foundation Convergence Accelerator Program under grant nos. 1937123 and 2033604. Publisher Copyright: {\textcopyright} 2021 IEEE.; 2021 IEEE International Conference on Big Data, Big Data 2021 ; Conference date: 15-12-2021 Through 18-12-2021",
year = "2021",
doi = "10.1109/BigData52589.2021.9671351",
language = "English (US)",
series = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "2721--2728",
editor = "Yixin Chen and Heiko Ludwig and Yicheng Tu and Usama Fayyad and Xingquan Zhu and Hu, {Xiaohua Tony} and Suren Byna and Xiong Liu and Jianping Zhang and Shirui Pan and Vagelis Papalexakis and Jianwu Wang and Alfredo Cuzzocrea and Carlos Ordonez",
booktitle = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
address = "United States",
}