@article{48574,
      recid = {48574},
      author = {Vowinckel, Konrad, and Hähnke, Volker D.,},
      title = {SEARCHFORMER : Semantic patent embeddings by siamese  transformers for prior art search.},
      pages = {1 volume.},
      abstract = {The identification of relevant prior art for patent  applications is of key importance for the work of patent  examiners. The recent advancements in the field of natural  language processing in the form of language models such as  BERT enable the creation of the next generation of prior  art search tools. These models can generate vectorial  representations of input text, enabling the use of vector  similarity as proxy for semantic text similarity. We  fine-tuned a patent-specific BERT model for prior art  search on a large set of real-world examples of patent  claims, corresponding passages prejudicing novelty or  inventive step, and random text fragments, creating the  SEARCHFORMER. We show in retrospective ranking experiments  that our model is a real improvement. For this purpose, we  compiled an evaluation collection comprising 2014 pairs of  patent application and related potential prior art  documents. We employed two representative baselines for  comparison: (i) an optimized combination of automatically  built queries and the BM25 ranking function, and (ii)  several state-of-the-art language models, including  SentenceTransformers optimized for semantic retrieval.  Ranking performance was measured as rank of the first  relevant result. Using t-tests, we show that the achieved  ranking improvements of the SEARCHFORMER over the baselines  are statistically significant.},
      url = {http://tind.wipo.int/record/48574},
      doi = {https://doi.org/10.1016/j.wpi.2023.102192},
}