import math from collections import Counter # Define your collection of documents documents = [ "This is the first document.", "This document is the second document.", "And this is the third one.", "Is this the first document?" ] def calculate_tf(document): words = document.lower().split() word_count = Counter(words) total_words = len(words) tf = {word: count / total_words for word, count in word_count.items()} return tf def calculate_df(documents): df = Counter() for document in documents: words = set(document.lower().split()) df.update(words) return df def calculate_idf(df, num_documents): idf = {word: math.log(num_documents / count) for word, count in df.items()} return idf def calculate_tfidf(tf, idf): tfidf = {word: tf_value * idf.get(word, 0) for word, tf_value in tf.items()} return tfidf if __name__ == "__main__": num_documents = len(documents) dfs = calculate_df(documents) idfs = calculate_idf(dfs, num_documents) for idx, document in enumerate(documents): tf = calculate_tf(document) tfidf = calculate_tfidf(tf, idfs) print(f"TF-IDF values for Document {idx + 1}:") for word, value in tfidf.items(): print(f"{word}: {value:.4f}") print("-" * 30)