Skip to content

Plagiarism Checker

🔑 ID:

34617

👨‍💻

Python

🕒

17/03/2024
Free

Description:

This code will run a Plagiarism detection on any piece of text you need, and give you a percentage as a result. However, make sure you get your own SERPAPI API Key and implement it in a .env file like this:

SERPER_API_KEY = “YOUR_API_KEY”

Code:

from SimplerLLM.tools.serp import search_with_serper_api
import re
from typing import List
import time 

def search_chunk(chunk) -> bool:
    """
    Searches for a given chunk of text on the internet using SerperAPI which accesses Google's Engines.
    
    Parameters:
    - chunk: A string representing the text chunk to search for.
    
    Returns:
    - A boolean indicating whether the chunk was found online (True) or not (False).
    """
    try:
        search_results = search_with_serper_api(f"\"{chunk}\"")
        found = len(search_results) > 0
        return found
    except Exception as e:
        print(f"An error occurred: {e}")
        return False 

def chunk_text(text, chunk_by) -> List[str]:
    """
    Splits the input text into chunks based on the specified granularity (sentences or paragraphs).
    
    Parameters:
    - text: The input text to be chunked.
    - chunk_by: The granularity for chunking ('sentence' or 'paragraph').
    
    Returns:
    - A list of strings, where each string is a chunk of the original text.
    """
    if chunk_by == "sentence":
        sentences = re.split(r'(?<!\d)[.?!](?!\d)', text)
        sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
        return sentences
    elif chunk_by == "paragraph":
        paragraphs = [paragraph.strip() for paragraph in text.split("\n") if paragraph.strip()]
        return paragraphs
    else:
        raise ValueError("Invalid chunk_by value. Choose 'sentence' or 'paragraph'.")

def calculate_plagiarism_score(text, chunk_by) -> float:
    """
    Calculates the plagiarism score of a given text by chunking it and checking each chunk for plagiarism.
    
    Parameters:
    - text: The input text to check for plagiarism.
    - chunk_by: The granularity for chunking the text ('sentence' or 'paragraph').
    
    Returns:
    - The plagiarism score as a float, representing the percentage of plagiarised content.
    """
    chunks = chunk_text(text, chunk_by)
    total_chunks = len(chunks)
    plagiarised_chunks = 0
    for chunk in chunks:
        if search_chunk(chunk):
            plagiarised_chunks += 1

    
    plagiarism_score = (plagiarised_chunks / total_chunks) * 100 if total_chunks > 0 else 0
    return plagiarism_score


#MAIN SECTION
start_time = time.time() 

text = """ INPUT_TEXT """ # The Input Text

chunk_by = "sentence"  # "sentence" or "paragraph"
plagiarism_score = calculate_plagiarism_score(text, chunk_by)

end_time = time.time()  # Record the end time
runtime = end_time - start_time  # Calculate the runtime

print(f"Plagiarism Score: {plagiarism_score}%")
print(f"Runtime: {runtime} seconds")  # Print the runtime

 

GitHub Link

✖️ Not Available

Download File

✖️ Not Available

If you’re encountering any problems or need further assistance with this code, we’re here to help! Join our community on the forum or Discord for support, tips, and discussion.