@article{48029,
      recid = {48029},
      author = {Lee, Jieh-Sheng,},
      title = {Evaluating generative patent language models.},
      pages = {1 volume.},
      abstract = {Generative language models are promising for assisting  human writing in various domains. This manuscript aims to  build generative language models in the patent domain and  evaluate model performance from a human-centric  perspective. The perspective is to measure the ratio of  keystrokes that can be saved by autocompletion based on  generative patent language models. A higher ratio means a  more effective model which can save more keystrokes. This  metric can be used to benchmark model performance. The  metric is keystroke-based and different from conventional  machine-centric metrics that are token-based. In terms of  model size, the largest model built in this manuscript is  PatentGPT-J-6B, which is state-of-the-art in the patent  domain. Based on the metric, it is found that the largest  model is not necessarily the best for the human-centric  metric. The finding means that keeping increasing model  sizes in the patent domain might be unnecessary if the  purpose is to assist human writing with autocompletion.  Several patent language models are pre-trained from scratch  in this research. The pre-trained models are released for  future researchers. Several visualization tools are also  provided. The importance of building a generative language  model in the patent domain is its potential to facilitate  creativity and innovations in the future.},
      url = {http://tind.wipo.int/record/48029},
      doi = {https://doi.org/10.1016/j.wpi.2023.102173},
}