diff --git a/poetry.lock b/poetry.lock
index c313ca5a6..5ec26c272 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2621,6 +2621,39 @@ files = [
     {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"},
 ]
 
+[[package]]
+name = "polars"
+version = "0.18.13"
+description = "Blazingly fast DataFrame library"
+category = "main"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "polars-0.18.13-cp38-abi3-macosx_10_7_x86_64.whl", hash = "sha256:d71167aea2968d7f354f2553a56369684b66dca48efb7dc0963fee7041bfc267"},
+    {file = "polars-0.18.13-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:eaa55c2bfab114718f9605d3149d58d7f92f95533da1e23559994b7a12f9b3b2"},
+    {file = "polars-0.18.13-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543d136666b8be18f679587b48bdc45b4541f332a9050f0ee90449cbf3d01a35"},
+    {file = "polars-0.18.13-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d052e18686c8a9b9a68c8360ad90e53886990460dc65aeb8f72d4c54859d398f"},
+    {file = "polars-0.18.13-cp38-abi3-win_amd64.whl", hash = "sha256:4b340a193144b2f5276b9c3c538784da80d9aca28ad5f27b7c183cdc292876bc"},
+    {file = "polars-0.18.13.tar.gz", hash = "sha256:b00d1c7700969c47f3202c5be54d074d99df51acb51943dc4b60cdcf759940fd"},
+]
+
+[package.extras]
+adbc = ["adbc_driver_sqlite"]
+all = ["polars[adbc,cloudpickle,connectorx,deltalake,fsspec,matplotlib,numpy,pandas,pyarrow,pydantic,sqlalchemy,timezone,xlsx2csv,xlsxwriter]"]
+cloudpickle = ["cloudpickle"]
+connectorx = ["connectorx"]
+deltalake = ["deltalake (>=0.10.0)"]
+fsspec = ["fsspec"]
+matplotlib = ["matplotlib"]
+numpy = ["numpy (>=1.16.0)"]
+pandas = ["pandas", "pyarrow (>=7.0.0)"]
+pyarrow = ["pyarrow (>=7.0.0)"]
+pydantic = ["pydantic"]
+sqlalchemy = ["pandas", "sqlalchemy"]
+timezone = ["backports.zoneinfo", "tzdata"]
+xlsx2csv = ["xlsx2csv (>=0.8.0)"]
+xlsxwriter = ["xlsxwriter"]
+
 [[package]]
 name = "prefixcommons"
 version = "0.1.12"
@@ -4293,4 +4326,4 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4.0.0"
-content-hash = "f7e918756caa973314f7051b9b5d0d09b4945dd98e50232f45d4f0e385fcb6ea"
+content-hash = "c04e3e48c27ef475fb44556640cfdf5e23c1a373db8abcabfd5b8f59f3a1ba4c"
diff --git a/pyproject.toml b/pyproject.toml
index 6714147d1..c6dee7b90 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ plotly = "^5.13.0"
 seaborn = "^0.12.2"
 matplotlib = "^3.7.0"
 pyserde = "^0.9.8"
+polars = "^0.18.13"
 
 [tool.poetry.dev-dependencies]
 pytest = "^7.2.0"
diff --git a/src/pheval/cli.py b/src/pheval/cli.py
index ed6b4d163..c905150f7 100644
--- a/src/pheval/cli.py
+++ b/src/pheval/cli.py
@@ -9,6 +9,7 @@
 from .cli_pheval_utils import (
     create_spiked_vcfs_command,
     scramble_phenopackets_command,
+    semsim_comparison_command,
     semsim_convert_command,
     semsim_scramble_command,
     update_phenopackets_command,
@@ -54,6 +55,7 @@ def pheval_utils():
 pheval_utils.add_command(semsim_convert_command)
 pheval_utils.add_command(scramble_phenopackets_command)
 pheval_utils.add_command(update_phenopackets_command)
+pheval_utils.add_command(semsim_comparison_command)
 pheval_utils.add_command(create_spiked_vcfs_command)
 pheval_utils.add_command(benchmark)
 pheval_utils.add_command(benchmark_comparison)
diff --git a/src/pheval/cli_pheval_utils.py b/src/pheval/cli_pheval_utils.py
index 59e5f6013..51246004e 100644
--- a/src/pheval/cli_pheval_utils.py
+++ b/src/pheval/cli_pheval_utils.py
@@ -9,7 +9,7 @@
 from pheval.prepare.create_spiked_vcf import spike_vcfs
 from pheval.prepare.custom_exceptions import InputError, MutuallyExclusiveOptionError
 from pheval.prepare.update_phenopacket import update_phenopackets
-from pheval.utils.semsim_utils import percentage_diff, semsim_heatmap_plot
+from pheval.utils.semsim_utils import semsim_comparison
 from pheval.utils.utils import semsim_convert, semsim_scramble
 
 
@@ -116,18 +116,11 @@ def scramble_phenopackets_command(
 
 @click.command("semsim-comparison")
 @click.option(
-    "--semsim-left",
-    "-L",
-    required=True,
-    metavar="FILE",
-    help="Path to the first semantic similarity profile.",
-)
-@click.option(
-    "--semsim-right",
-    "-R",
+    "--input",
+    "-i",
+    multiple=True,
     required=True,
-    metavar="FILE",
-    help="Path to the second semantic similarity profile.",
+    help="Semsim inputs file",
 )
 @click.option(
     "--score-column",
@@ -142,42 +135,39 @@ def scramble_phenopackets_command(
     "--analysis",
     "-a",
     required=True,
-    type=click.Choice(["heatmap", "percentage_diff"], case_sensitive=False),
+    type=click.Choice(["heatmap", "percentage_diff", "distribution"], case_sensitive=False),
     help="""There are two types of analysis:
         heatmap - Generates a heatmap plot that shows the differences between the semantic similarity profiles using the
         score column for this purpose. Defaults to "heatmap".
-        percentage_diff - Calculates the score column percentage difference between the semantic similarity profiles""",
+        percentage_diff - Calculates the score column percentage difference between the semantic similarity profiles
+        distribution - Plot showing the semsim score's distributions""",
 )
 @click.option(
-    "--output",
-    "-o",
-    metavar="FILE",
-    default="percentage_diff.semsim.tsv",
-    help="Output path for the difference tsv. Defaults to percentage_diff.semsim.tsv",
+    "--output-dir",
+    "-O",
+    metavar="output_dir",
+    default=".",
+    help="Output path directory for the comparisons",
 )
-def semsim_comparison(
-    semsim_left: Path,
-    semsim_right: Path,
+def semsim_comparison_command(
+    input: List[Path],
     score_column: str,
     analysis: str,
-    output: Path = "percentage_diff.semsim.tsv",
+    output_dir: Path,
 ):
-    """Compares two semantic similarity profiles
+    """Compares semantic similarity profiles
 
     Args:
-        semsim-left (Path): File path of the first semantic similarity profile
-        semsim-right (Path): File path of the second semantic similarity profile
-        output (Path): Output path for the difference tsv. Defaults to "percentage_diff.semsim.tsv".
+        input (List[Path]): File paths semantic similarity profiles
+        output-dir (Path): Output directory path for the comparisons.
         score_column (str): Score column that will be computed (e.g. jaccard_similarity)
-        analysis (str): There are two types of analysis:
+        analysis (str): There are three types of analysis:
         heatmap - Generates a heatmap plot that shows the differences between the semantic similarity profiles using the
         score column for this purpose. Defaults to "heatmap".
         percentage_diff - Calculates the score column percentage difference between the semantic similarity profiles.
+        distribution - Plot showing the semsim score's distributions
     """
-    if analysis == "heatmap":
-        return semsim_heatmap_plot(semsim_left, semsim_right, score_column)
-    if analysis == "percentage_diff":
-        percentage_diff(semsim_left, semsim_right, score_column, output)
+    semsim_comparison(input, score_column, analysis, output_dir)
 
 
 @click.command("update-phenopackets")
diff --git a/src/pheval/utils/semsim_utils.py b/src/pheval/utils/semsim_utils.py
index 1451d533b..fd6c02358 100644
--- a/src/pheval/utils/semsim_utils.py
+++ b/src/pheval/utils/semsim_utils.py
@@ -1,66 +1,142 @@
 """
 Contains all pheval utility methods
 """
+import logging
+import subprocess
+from itertools import combinations
 from pathlib import Path
+from typing import List
 
-import numpy
 import pandas as pd
 import plotly.express as px
+import polars as pl
+import seaborn as sns
+from matplotlib import pyplot as plt
+
+from tqdm import tqdm
 
 import pheval.utils.file_utils as file_utils
 
+info_log = logging.getLogger("info")
+
+
+def semsim_comparison(input: List[Path], score_column: str, analysis: str, output: Path):
+    """Makes a paired semantic similarity profiles comparison based on a chosen score column
+
+    Args:
+        input (List[Path]): semsim profiles path's
+        score_column (str): Score column that will be computed (e.g. jaccard_similarity)
+        analysis (str): There are three types of analysis:
+        heatmap - Generates a heatmap plot that shows the differences between the semantic similarity profiles using the
+        score column for this purpose. Defaults to "heatmap".
+        percentage_diff - Calculates the score column percentage difference between the semantic similarity profiles.
+        distribution - Plot showing the semsim score's distributions
+
+    """
+    for s in set(combinations(input, 2)):
+        semsim_left = s[0]
+        semsim_right = s[1]
+        if analysis == "heatmap":
+            semsim_heatmap_plot(semsim_left, semsim_right, score_column)
+        if analysis == "percentage_diff":
+            percentage_diff(semsim_left, semsim_right, score_column, output)
+    if analysis == "distribution":
+        distribution(input, score_column, output)
+
 
-def filter_non_0_score(data: pd.DataFrame, col: str) -> pd.DataFrame:
+def filter_non_0_score(data: pl.DataFrame, col: str) -> pd.DataFrame:
     """Removes rows that have value equal to 0 based on the given column passed by col parameter
 
     Args:
-        data (pd.DataFrame): Dirty dataframe
+        data (pl.DataFrame): Dirty dataframe
         col (str): Column to be filtered
 
     Returns:
-        pd.DataFrame: Filtered dataframe
+        pl.DataFrame: Filtered dataframe
     """
-    return data[data[col] != 0]
+    return data.filter(pl.col(col) != 0)
 
 
-def parse_semsim(df: pd.DataFrame, cols: list) -> pd.DataFrame:
+def parse_semsim(df: pl.DataFrame, cols: list) -> pd.DataFrame:
     """Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones
 
     Args:
-        df (pd.DataFrame): semantic similarity profile dataframe
+        df (pl.DataFrame): semantic similarity profile dataframe
         cols (list): list of columns that will be selected on semsim data
 
     Returns:
         pd.Dataframe: parsed semantic similarity dataframe
     """
-    df[cols[-1]] = pd.to_numeric(df[cols[-1]], errors="coerce")
-    df.replace("None", numpy.nan).dropna(subset=cols[-1], inplace=True)
+    df.with_columns(pl.col(cols[-1]).cast(pl.Float64))
+    df[cols[-1]].set(df[cols[-1]].is_null(), None)
+
+    df.drop_nulls(cols[-1])
     return df
 
 
 def diff_semsim(
-    semsim_left: pd.DataFrame, semsim_right: pd.DataFrame, score_column: str, absolute_diff: bool
-) -> pd.DataFrame:
+    semsim_left: pl.DataFrame, semsim_right: pl.DataFrame, score_column: str, absolute_diff: bool
+) -> pl.DataFrame:
     """Calculates score difference between two semantic similarity profiles
 
     Args:
-        semsim_left (pd.DataFrame): first semantic similarity dataframe
-        semsim_right (pd.DataFrame): second semantic similarity dataframe
+        semsim_left (pl.DataFrame): first semantic similarity dataframe
+        semsim_right (pl.DataFrame): second semantic similarity dataframe
         score_column (str): Score column that will be computed (e.g. jaccard_similarity)
         absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False).
         Defaults to True.
 
     Returns:
-        pd.DataFrame: A dataframe with terms and its scores differences
+        pl.DataFrame: A dataframe with terms and its scores differences
     """
-    df = pd.merge(semsim_left, semsim_right, on=["subject_id", "object_id"], how="outer")
+    df = semsim_left.join(semsim_right, on=["subject_id", "object_id"], how="outer")
+    df.drop_nulls(score_column)
+    df.drop_nulls(f"{score_column}_right")
     if absolute_diff:
-        df["diff"] = df[f"{score_column}_x"] - df[f"{score_column}_y"]
-        return df[["subject_id", "object_id", "diff"]]
-    df["diff"] = df.apply(
-        lambda row: get_percentage_diff(row[f"{score_column}_x"], row[f"{score_column}_y"]), axis=1
+        df = df.with_columns((pl.col(score_column) - pl.col(f"{score_column}_right")).alias("diff"))
+        return df[["subject_id", "object_id", f"{score_column}", f"{score_column}_right", "diff"]]
+    df = df.with_columns(
+        # horizontal sum with a custom apply
+        pl.struct([score_column, f"{score_column}_right"])
+        .apply(lambda x: get_percentage_diff(x[score_column], x[f"{score_column}_right"]))
+        .alias("diff")
+    )
+    return df[["subject_id", "object_id", f"{score_column}", f"{score_column}_right", "diff"]]
+
+
+def distribution(input: List[Path], score_column: str, output: Path):
+    df_list = []
+    plt.rcParams["figure.autolayout"] = True
+    plt.rcParams["figure.figsize"] = [20, 3.50 * len(input)]
+    _, axes = plt.subplots(len(input), 1)
+    for idx, i in enumerate(input):
+        print(f"Reading {Path(i).stem}")
+        df = pl.read_csv(i, separator="\t")
+        df = df[["subject_id", "object_id", f"{score_column}"]]
+        df = df.with_columns(semsim=pl.lit(Path(i).stem))
+        df_list.append(df)
+        axes[idx].ticklabel_format(style="plain", axis="both")
+        axes[idx].set_xlabel(score_column)
+        sns.histplot(df[score_column], bins=20, ax=axes[idx]).set_title(Path(i).stem)
+    plt.setp(axes, ylim=axes[0].get_ylim())
+    print("Concatenating data")
+    df_concat = pl.concat(df_list)
+    print(f"Saving plot in {output}/bars.png")
+    plt.savefig(f"{output}/bars.png")
+    plt.clf()
+    graph = sns.histplot(
+        df_concat,
+        x=score_column,
+        bins=10,
+        multiple="dodge",
+        fill=True,
+        kde=True,
+        alpha=0.5,
+        hue="semsim",
     )
-    return df[["subject_id", "object_id", f"{score_column}_x", f"{score_column}_y", "diff"]]
+    graph.ticklabel_format(style="plain", axis="both")
+    print(f"Saving plot in {output}/dist.png")
+    plt.savefig(f"{output}/dist.png")
 
 
 def percentage_diff(semsim_left: Path, semsim_right: Path, score_column: str, output: Path):
@@ -72,8 +148,24 @@ def percentage_diff(semsim_left: Path, semsim_right: Path, score_column: str, ou
         score_column (str): Score column that will be computed (e.g. jaccard_similarity)
         output (Path): Output path for the difference tsv file
     """
-    clean_df = semsim_analysis(semsim_left, semsim_right, score_column, absolute_diff=False)
-    clean_df.sort_values(by="diff", ascending=False).to_csv(output, sep="\t", index=False)
+    fname_left = Path(semsim_left).stem
+    fname_right = Path(semsim_right).stem
+    fname = f"{output}/{fname_left}-{fname_right}.diff.tsv"
+    Path(fname).unlink(missing_ok=True)
+    for idx, clean_df in enumerate(
+        semsim_analysis(semsim_left, semsim_right, score_column, absolute_diff=False)
+    ):
+        (
+            clean_df.drop_nulls("diff")
+            .sort("diff", descending=True)
+            .rename(
+                {
+                    score_column: f"{fname_left}_{score_column}",
+                    f"{score_column}_right": f"{fname_right}_{score_column}",
+                }
+            )
+            .write_csv(fname, has_header=idx == 0, separator="\t")
+        )
 
 
 def semsim_heatmap_plot(semsim_left: Path, semsim_right: Path, score_column: str):
@@ -87,12 +179,13 @@ def semsim_heatmap_plot(semsim_left: Path, semsim_right: Path, score_column: str
     clean_df = semsim_analysis(semsim_left, semsim_right, score_column)
     df = clean_df.pivot(index="subject_id", columns="object_id", values="diff")
     fig = px.imshow(df, text_auto=True)
+    fig.update_layout(
+        title=f"{Path(semsim_left).stem} - {Path(semsim_right).stem}", xaxis_nticks=36
+    )
     fig.show()
 
 
-def semsim_analysis(
-    semsim_left: Path, semsim_right: Path, score_column: str, absolute_diff=True
-) -> pd.DataFrame:
+def semsim_analysis(semsim_left: Path, semsim_right: Path, score_column: str, absolute_diff=True):
     """semsim_analysis
 
     Args:
@@ -102,22 +195,35 @@ def semsim_analysis(
         absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False).
         Defaults to True.
 
-    Returns:
-        [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles
+    Yields:
+        pd.DataFrame: DataFrame with the differences between two semantic similarity profiles
     """
     validate_semsim_file_comparison(semsim_left, semsim_right)
     cols = ["subject_id", "object_id", score_column]
-    semsim_left = pd.read_csv(semsim_left, sep="\t")
-    semsim_right = pd.read_csv(semsim_right, sep="\t")
-    file_utils.ensure_columns_exists(
-        cols=cols,
-        err_message="must exist in semsim dataframes",
-        dataframes=[semsim_left, semsim_right],
-    )
-    semsim_left = parse_semsim(semsim_left, cols)
-    semsim_right = parse_semsim(semsim_right, cols)
-    diff_df = diff_semsim(semsim_left, semsim_right, score_column, absolute_diff)
-    return filter_non_0_score(diff_df, "diff")
+    batch_size = 100000
+    count = int(subprocess.check_output(["wc", "-l", semsim_left]).split()[0])
+    reader_left = pl.read_csv_batched(semsim_left, separator="\t", batch_size=batch_size)
+    reader_right = pl.read_csv_batched(semsim_right, separator="\t", batch_size=batch_size)
+    # file_utils.ensure_columns_exists(
+    #     cols=cols,
+    #     err_message="must exist in semsim dataframes",
+    #     dataframes=[reader_left, reader_right],
+    # )
+    batches_left = reader_left.next_batches(5)
+    batches_right = reader_right.next_batches(5)
+    with tqdm(total=count - 1) as bar:
+        while batches_left or batches_right:
+            for input_data in zip(batches_left, batches_right):
+                semsim_left = parse_semsim(input_data[0], cols)
+                semsim_right = parse_semsim(input_data[1], cols)
+                diff_df = diff_semsim(semsim_left, semsim_right, score_column, absolute_diff)
+                bar.update(input_data[0].shape[0])
+                if not absolute_diff:
+                    yield diff_df
+                else:
+                    yield filter_non_0_score(diff_df, "diff")
+            batches_left = reader_left.next_batches(5)
+            batches_right = reader_right.next_batches(5)
 
 
 def validate_semsim_file_comparison(semsim_left: Path, semsim_right: Path):
@@ -145,6 +251,8 @@ def get_percentage_diff(current_number: float, previous_number: float) -> float:
         float: percentage difference between two numbers
     """
     try:
+        if not current_number or not previous_number:
+            return None
         if current_number == previous_number:
             return "{:.2%}".format(0)
         if current_number > previous_number:
diff --git a/testdata/semsim/hp-mp.semsim.tsv b/testdata/semsim/hp-mp.semsim.tsv
index 2e97d2f2e..4b16b2ad6 100644
--- a/testdata/semsim/hp-mp.semsim.tsv
+++ b/testdata/semsim/hp-mp.semsim.tsv
@@ -1,10 +1,10 @@
 subject_id	subject_label	subject_source	object_id	object_label	object_source	ancestor_id	ancestor_label	ancestor_source	object_information_content	subject_information_content	ancestor_information_content	jaccard_similarity	dice_similarity	phenodigm_score
-HP:0000001	None	None	HP:0000236	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.05263157894736842	None	0.19092705490615916
-HP:0000001	None	None	HP:0000309	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.08333333333333333	None	0.24024460895922492
-HP:0000001	None	None	HP:0000322	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.034482758620689655	None	0.15454155401543365
-HP:0000001	None	None	HP:0000735	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.07142857142857142	None	0.22242328783644721
-HP:0000001	None	None	HP:0000826	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.06666666666666667	None	0.21488131074427277
-HP:0000001	None	None	HP:0000853	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.08333333333333333	None	0.24024460895922492
-HP:0000001	None	None	HP:0000938	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.05555555555555555	None	0.19615890180152568
-HP:0000001	None	None	HP:0001144	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.0625	None	0.20805793448094734
-HP:0000001	None	None	HP:0001443	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.13	None	0.26317478329195043
+HP:0000001	None	None	HP:0000236	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.02	None	0.19092705490615916
+HP:0000001	None	None	HP:0000309	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.01	None	0.24024460895922492
+HP:0000001	None	None	HP:0000322	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.04	None	0.15454155401543365
+HP:0000001	None	None	HP:0000735	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.09	None	0.22242328783644721
+HP:0000001	None	None	HP:0000826	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.01	None	0.21488131074427277
+HP:0000001	None	None	HP:0000853	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.02	None	0.24024460895922492
+HP:0000001	None	None	HP:0000938	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.09	None	0.19615890180152568
+HP:0000001	None	None	HP:0001144	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.01	None	0.20805793448094734
+HP:0000001	None	None	HP:0001443	None	None	HP:0000001	None	None	None	None	0.6926096656076508	0.9	None	0.26317478329195043
diff --git a/testdata/semsim/hp-mp3.semsim.tsv b/testdata/semsim/hp-mp3.semsim.tsv
new file mode 100644
index 000000000..7564003d9
--- /dev/null
+++ b/testdata/semsim/hp-mp3.semsim.tsv
@@ -0,0 +1,10 @@
+subject_id	subject_label	subject_source	object_id	object_label	object_source	ancestor_id	ancestor_label	ancestor_source	object_information_content	subject_information_content	ancestor_information_content	jaccard_similarity	dice_similarity	phenodigm_score
+HP:0000001	None	None	HP:0000236	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.01	None	0.19617285846668062
+HP:0000001	None	None	HP:0000309	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.09	None	0.24026170231329516
+HP:0000001	None	None	HP:0000322	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.01	None	0.1545525496149303
+HP:0000001	None	None	HP:0000735	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.08	None	0.222439113207218
+HP:0000001	None	None	HP:0000826	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.02	None	0.21489659950493856
+HP:0000001	None	None	HP:0000853	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.04	None	0.24026170231329516
+HP:0000001	None	None	HP:0000938	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.01	None	0.19617285846668062
+HP:0000001	None	None	HP:0001144	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.02	None	0.20807273775980803
+HP:0000001	None	None	HP:0001443	None	None	HP:0000001	None	None	None	None	0.6927082271817895	0.5	None	0.26319350812316583
diff --git a/tests/test_cli.py b/tests/test_cli.py
index b65449bb0..a9024822d 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -8,7 +8,7 @@
 from click.testing import CliRunner
 
 from pheval.cli_pheval import run
-from pheval.cli_pheval_utils import semsim_comparison
+from pheval.cli_pheval_utils import semsim_comparison_command
 
 
 class TestCommandLineInterface(unittest.TestCase):
@@ -54,15 +54,18 @@ def test_cli_runner(self):
 
     def test_semsim_heatmap(self):
         """test_semsim_heatmap"""
-        semsim_left = "./testdata/semsim/hp-mp.semsim.tsv"
-        semsim_right = "./testdata/semsim/hp-mp2.semsim.tsv"
+        semsim_1 = "./testdata/semsim/hp-mp.semsim.tsv"
+        semsim_2 = "./testdata/semsim/hp-mp2.semsim.tsv"
+        semsim_3 = "./testdata/semsim/hp-mp3.semsim.tsv"
         result = self.runner.invoke(
-            semsim_comparison,
+            semsim_comparison_command,
             [
-                "--semsim-left",
-                semsim_left,
-                "--semsim-right",
-                semsim_right,
+                "--input",
+                semsim_1,
+                "--input",
+                semsim_2,
+                "--input",
+                semsim_3,
                 "-c",
                 "jaccard_similarity",
                 "-a",
@@ -79,11 +82,11 @@ def test_semsim_heatmap_invalid_col(self):
         semsim_left = "./testdata/semsim/hp-mp.semsim.tsv"
         semsim_right = "./testdata/semsim/hp-mp2.semsim.tsv"
         result = self.runner.invoke(
-            semsim_comparison,
+            semsim_comparison_command,
             [
-                "--semsim-left",
+                "--input",
                 semsim_left,
-                "--semsim-right",
+                "--input",
                 semsim_right,
                 "-c",
                 "invalid_col",
@@ -100,11 +103,11 @@ def test_semsim_heatmap_invalid_file(self):
         semsim_left = "./testdata/semsim/hp-mpx.semsim.tsv"
         semsim_right = "./testdata/semsim/hp-mp2.semsim.tsv"
         result = self.runner.invoke(
-            semsim_comparison,
+            semsim_comparison_command,
             [
-                "--semsim-left",
+                "--input",
                 semsim_left,
-                "--semsim-right",
+                "--input",
                 semsim_right,
                 "-c",
                 "jaccard_similarity",
@@ -121,11 +124,11 @@ def test_semsim_heatmap_invalid_equal_file(self):
         semsim_left = "./testdata/semsim/hp-mp.semsim.tsv"
         semsim_right = "./testdata/semsim/hp-mp.semsim.tsv"
         result = self.runner.invoke(
-            semsim_comparison,
+            semsim_comparison_command,
             [
-                "--semsim-left",
+                "--input",
                 semsim_left,
-                "--semsim-right",
+                "--input",
                 semsim_right,
                 "-c",
                 "jaccard_similarity",
@@ -137,3 +140,56 @@ def test_semsim_heatmap_invalid_equal_file(self):
         self.assertEqual(errmsg, str(result.exception))
         logging.info("ERR=%s", result.exception)
         self.assertEqual(1, result.exit_code)
+
+    def test_semsim_distribution_plot(self):
+        """test_semsim_distribution_plot"""
+        semsim_1 = "./testdata/semsim/hp-mp.semsim.tsv"
+        semsim_2 = "./testdata/semsim/hp-mp2.semsim.tsv"
+        semsim_3 = "./testdata/semsim/hp-mp3.semsim.tsv"
+        Path("./results").mkdir(parents=True, exist_ok=True)
+        result = self.runner.invoke(
+            semsim_comparison_command,
+            [
+                "--input",
+                semsim_1,
+                "--input",
+                semsim_2,
+                "--input",
+                semsim_3,
+                "-c",
+                "jaccard_similarity",
+                "-a",
+                "distribution",
+                "-O",
+                "./results",
+            ],
+        )
+        err = result.stderr
+        self.assertEqual(None, result.exception)
+        logging.info("ERR=%s", err)
+        self.assertEqual(0, result.exit_code)
+
+    def test_semsim_diff(self):
+        """test_semsim_distribution_plot"""
+        semsim_1 = "/home/vinicius/workspace/monarch-semantic-similarity-profiles/profiles/upheno2-lattice-hp-mp.semsimian.0.tsv"
+        semsim_2 = "/home/vinicius/workspace/monarch-semantic-similarity-profiles/profiles/upheno2-lattice-hp-hp.semsimian.0.tsv"
+        Path("./results").mkdir(parents=True, exist_ok=True)
+        result = self.runner.invoke(
+            semsim_comparison_command,
+            [
+                "--input",
+                semsim_1,
+                "--input",
+                semsim_2,
+                "-c",
+                "jaccard_similarity",
+                "-a",
+                "percentage_diff",
+                "-O",
+                "./results",
+            ],
+        )
+        err = result.stderr
+        self.assertEqual(None, result.exception)
+        logging.info("ERR=%s", err)
+        self.assertEqual(0, result.exit_code)