﻿<?xml version="1.0" encoding="utf-8"?>
<ArticleSet>
  <ARTICLE>
    <Journal>
      <PublisherName>مرکز منطقه ای اطلاع رسانی علوم و فناوری</PublisherName>
      <JournalTitle>Journal of Information Systems and Telecommunication (JIST) </JournalTitle>
      <ISSN>2322-1437</ISSN>
      <Volume>13</Volume>
      <Issue>49</Issue>
      <PubDate PubStatus="epublish">
        <Year>2025</Year>
        <Month>5</Month>
        <Day>25</Day>
      </PubDate>
    </Journal>
    <ArticleTitle>A Turkish Dataset and BERTurk-Contrastive Model for Semantic Textual Similarity</ArticleTitle>
    <VernacularTitle>A Turkish Dataset and BERTurk-Contrastive Model for Semantic Textual Similarity</VernacularTitle>
    <FirstPage>24</FirstPage>
    <LastPage>32</LastPage>
    <ELocationID EIdType="doi">10.61186/jist.48127.13.49.24</ELocationID>
    <Language>en</Language>
    <AuthorList>
      <Author>
        <FirstName>Somaiyeh</FirstName>
        <LastName>Dehghan</LastName>
        <Affiliation></Affiliation>
      </Author>
      <Author>
        <FirstName>Mehmet Fatih</FirstName>
        <LastName>Amasyali</LastName>
        <Affiliation>Yildiz Technical University</Affiliation>
      </Author>
    </AuthorList>
    <History PubStatus="received">
      <Year>2024</Year>
      <Month>9</Month>
      <Day>27</Day>
    </History>
    <Abstract>&lt;p&gt;Semantic Textual Similarity (STS) is an important NLP task that measures the degree of semantic equivalence between two texts, even if the sentence pairs contain different words. While extensively studied in English, STS has received limited attention in Turkish. This study introduces BERTurk-contrastive, a novel BERT-based model leveraging contrastive learning to enhance the STS task in Turkish. Our model aims to learn representations by bringing similar sentences closer together in the embedding space while pushing dissimilar ones farther apart. To support this task, we release SICK-tr, a new STS dataset in Turkish, created by translating the English SICK dataset. We evaluate our model on STSb-tr and SICK-tr, achieving a significant improvement of 5.92 points over previous models. These results establish BERTurk-contrastive as a robust solution for STS in Turkish and provide a new benchmark for future research.&lt;/p&gt;</Abstract>
    <ObjectList>
      <Object Type="Keyword">
        <Param Name="Value">Semantic Textual Similarity</Param>
      </Object>
      <Object Type="Keyword">
        <Param Name="Value">Contrastive Learning</Param>
      </Object>
      <Object Type="Keyword">
        <Param Name="Value">Deep Learning</Param>
      </Object>
      <Object Type="Keyword">
        <Param Name="Value">BERT; BERTurk</Param>
      </Object>
      <Object Type="Keyword">
        <Param Name="Value">Turkish Language</Param>
      </Object>
    </ObjectList>
    <ArchiveCopySource DocType="Pdf">http://jist.ir/en/Article/Download/48127</ArchiveCopySource>
  </ARTICLE>
</ArticleSet>