GraphRAG+ollama+LM Studio+chainlit高级用法

ollama下载模型

ollama run gemma2:9b

graphrag设置

pip install graphrag

python -m graphrag.index --init --root ./ragpdf

python -m graphrag.index --root ./ragpdf

本地模型配置

encoding_model: cl100k_base
skip_workflows: []
llm:
  api_key: ollama
  type: openai_chat # or azure_openai_chat
  model: gemma2:latest
  model_supports_json: true # recommended if this is available for your model.
  api_base: http://localhost:11434/v1


embeddings:
  ## parallelization: override the global parallelization settings for embeddings
  async_mode: threaded # or asyncio
  llm:
    api_key: lm-studio
    type: openai_embedding # or azure_openai_embedding
    model: nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q5_K_M.gguf
    api_base:  http://localhost:1234/v1

pdf转markdown,markdown转txt

#测试文档 https://github.com/win4r/mytest/blob/main/book.pdf

pip install marker-pdf

marker_single ./book.pdf ./pdftxt --batch_multiplier 2 --max_pages 60 --langs English

#markdown转txt
python markdown_to_text.py book.md book.txt

👉👉👉如有问题请联系我的徽信 stoeng

🔥🔥🔥本项目代码由AI超元域频道制作,观看更多大模型微调视频请访问我的频道⬇

👉👉👉我的哔哩哔哩频道

👉👉👉我的YouTube频道

👉👉👉我的开源项目 https://github.com/win4r/AISuperDomain

设置API key和模型名称

export GRAPHRAG_API_KEY="sk-xggd2443fg"
export GRAPHRAG_LLM_MODEL="gpt-3.5-turbo"
export GRAPHRAG_EMBEDDING_MODEL="text-embedding-ada-002"

global search

import os
import asyncio
import pandas as pd
import tiktoken

from graphrag.query.indexer_adapters import read_indexer_entities, read_indexer_reports
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.global_search.community_context import (
    GlobalCommunityContext,
)
from graphrag.query.structured_search.global_search.search import GlobalSearch

async def main():
    # 设置语言模型(LLM)
    # 从环境变量中获取API密钥和模型名称
    api_key = os.environ.get("GRAPHRAG_API_KEY")
    llm_model = os.environ.get("GRAPHRAG_LLM_MODEL")

    # 初始化ChatOpenAI实例
    llm = ChatOpenAI(
        api_key=api_key,
        model=llm_model,
        api_type=OpenaiApiType.OpenAI,  # 使用OpenAI API
        max_retries=20,  # 最大重试次数
    )

    # 初始化token编码器
    token_encoder = tiktoken.get_encoding("cl100k_base")

    # 加载社区报告作为全局搜索的上下文
    INPUT_DIR = "./inputs/operation dulce"  # 输入目录
    COMMUNITY_REPORT_TABLE = "create_final_community_reports"  # 社区报告表名
    ENTITY_TABLE = "create_final_nodes"  # 实体表名
    ENTITY_EMBEDDING_TABLE = "create_final_entities"  # 实体嵌入表名

    COMMUNITY_LEVEL = 2  # Leiden社区层次结构中的社区级别
    # 读取parquet文件
    entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
    report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
    entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")

    # 读取索引器报告和实体
    reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
    entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
    print(f"报告记录数: {len(report_df)}")
    print(report_df.head())

    # 基于社区报告构建全局上下文
    context_builder = GlobalCommunityContext(
        community_reports=reports,
        entities=entities,  # 用于计算上下文排序的社区权重
        token_encoder=token_encoder,
    )

    # 执行全局搜索
    # 设置上下文构建器参数
    context_builder_params = {
        "use_community_summary": False,  # 使用完整的社区报告,而不是摘要
        "shuffle_data": True,  # 随机打乱数据
        "include_community_rank": True,  # 包括社区排名
        "min_community_rank": 0,  # 最小社区排名
        "community_rank_name": "rank",  # 社区排名的名称
        "include_community_weight": True,  # 包括社区权重
        "community_weight_name": "occurrence weight",  # 社区权重的名称
        "normalize_community_weight": True,  # 标准化社区权重
        "max_tokens": 12_000,  # 最大token数
        "context_name": "Reports",  # 上下文名称
    }

    # 设置map LLM参数
    map_llm_params = {
        "max_tokens": 1000,  # 最大生成token数
        "temperature": 0.0,  # 温度参数,控制输出的随机性
        "response_format": {"type": "json_object"},  # 响应格式为JSON对象
    }

    # 设置reduce LLM参数
    reduce_llm_params = {
        "max_tokens": 2000,  # 最大生成token数
        "temperature": 0.0,  # 温度参数,控制输出的随机性
    }

    # 初始化全局搜索引擎
    search_engine = GlobalSearch(
        llm=llm,
        context_builder=context_builder,
        token_encoder=token_encoder,
        max_data_tokens=12_000,  # 最大数据token数
        map_llm_params=map_llm_params,
        reduce_llm_params=reduce_llm_params,
        allow_general_knowledge=False,  # 不允许使用通用知识
        json_mode=True,  # 使用JSON模式
        context_builder_params=context_builder_params,
        concurrent_coroutines=32,  # 并发协程数
        response_type="multiple paragraphs",  # 响应类型为多个段落
    )

    # 执行异步搜索
    result = await search_engine.asearch(
        "这个故事中的主要冲突是什么,谁是主角和反派?"
    )

    # 打印搜索结果
    print(result.response)
    print("上下文数据报告:")
    print(result.context_data["reports"])
    print(f"LLM调用次数: {result.llm_calls}. LLM tokens数: {result.prompt_tokens}")

if __name__ == "__main__":
    asyncio.run(main())  # 运行异步主函数

local search

import os
import asyncio
import pandas as pd
import tiktoken

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import LocalSearchMixedContext
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

async def main():
    # 设置输入目录和数据表名
    INPUT_DIR = "./inputs/operation dulce"
    LANCEDB_URI = f"{INPUT_DIR}/lancedb"
    COMMUNITY_REPORT_TABLE = "create_final_community_reports"
    ENTITY_TABLE = "create_final_nodes"
    ENTITY_EMBEDDING_TABLE = "create_final_entities"
    RELATIONSHIP_TABLE = "create_final_relationships"
    COVARIATE_TABLE = "create_final_covariates"
    TEXT_UNIT_TABLE = "create_final_text_units"
    COMMUNITY_LEVEL = 2

    # 读取实体数据
    entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
    entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
    entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

    # 设置和加载实体描述嵌入
    description_embedding_store = LanceDBVectorStore(collection_name="entity_description_embeddings")
    description_embedding_store.connect(db_uri=LANCEDB_URI)
    store_entity_semantic_embeddings(entities=entities, vectorstore=description_embedding_store)

    # 读取关系数据
    relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
    relationships = read_indexer_relationships(relationship_df)

    # 读取协变量数据
    covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")
    claims = read_indexer_covariates(covariate_df)
    covariates = {"claims": claims}

    # 读取社区报告数据
    report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
    reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

    # 读取文本单元数据
    text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
    text_units = read_indexer_text_units(text_unit_df)

    # 设置LLM和嵌入模型
    api_key = os.environ["GRAPHRAG_API_KEY"]
    llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
    embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]

    llm = ChatOpenAI(
        api_key=api_key,
        model=llm_model,
        api_type=OpenaiApiType.OpenAI,
        max_retries=20,
    )

    token_encoder = tiktoken.get_encoding("cl100k_base")

    text_embedder = OpenAIEmbedding(
        api_key=api_key,
        api_base=None,
        api_type=OpenaiApiType.OpenAI,
        model=embedding_model,
        deployment_name=embedding_model,
        max_retries=20,
    )

    # 创建本地搜索上下文构建器
    context_builder = LocalSearchMixedContext(
        community_reports=reports,
        text_units=text_units,
        entities=entities,
        relationships=relationships,
        covariates=covariates,
        entity_text_embeddings=description_embedding_store,
        embedding_vectorstore_key=EntityVectorStoreKey.ID,
        text_embedder=text_embedder,
        token_encoder=token_encoder,
    )

    # 设置本地搜索参数
    local_context_params = {
        "text_unit_prop": 0.5,
        "community_prop": 0.1,
        "conversation_history_max_turns": 5,
        "conversation_history_user_turns_only": True,
        "top_k_mapped_entities": 10,
        "top_k_relationships": 10,
        "include_entity_rank": True,
        "include_relationship_weight": True,
        "include_community_rank": False,
        "return_candidate_context": False,
        "embedding_vectorstore_key": EntityVectorStoreKey.ID,
        "max_tokens": 12_000,
    }

    llm_params = {
        "max_tokens": 2_000,
        "temperature": 0.0,
    }

    # 创建本地搜索引擎
    search_engine = LocalSearch(
        llm=llm,
        context_builder=context_builder,
        token_encoder=token_encoder,
        llm_params=llm_params,
        context_builder_params=local_context_params,
        response_type="multiple paragraphs",
    )

    # 运行本地搜索示例
    result = await search_engine.asearch("Tell me about Agent Mercer")
    print(result.response)

    result = await search_engine.asearch("Tell me about Dr. Jordan Hayes")
    print(result.response)

    # 创建问题生成器
    question_generator = LocalQuestionGen(
        llm=llm,
        context_builder=context_builder,
        token_encoder=token_encoder,
        llm_params=llm_params,
        context_builder_params=local_context_params,
    )

    # 生成候选问题
    question_history = [
        "Tell me about Agent Mercer",
        "What happens in Dulce military base?",
    ]
    candidate_questions = await question_generator.agenerate(
        question_history=question_history, context_data=None, question_count=5
    )
    print(candidate_questions.response)

if __name__ == "__main__":
    asyncio.run(main())

global-ui

import os
import asyncio
import pandas as pd
import tiktoken
import chainlit as cl

from graphrag.query.indexer_adapters import read_indexer_entities, read_indexer_reports
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.structured_search.global_search.community_context import GlobalCommunityContext
from graphrag.query.structured_search.global_search.search import GlobalSearch

# 全局变量
search_engine = None


@cl.on_chat_start
async def on_chat_start():
    global search_engine

    # 从环境变量中获取API密钥和模型名称
    api_key = os.environ.get("GRAPHRAG_API_KEY")
    if not api_key:
        await cl.Message(content="错误:GRAPHRAG_API_KEY 未设置").send()
        raise ValueError("GRAPHRAG_API_KEY 环境变量未设置")

    llm_model = os.environ.get("GRAPHRAG_LLM_MODEL", "gpt-3.5-turbo")

    # 初始化ChatOpenAI实例
    llm = ChatOpenAI(
        api_key=api_key,
        model=llm_model,
        api_type=OpenaiApiType.OpenAI,
        max_retries=20,
    )

    # 初始化token编码器
    token_encoder = tiktoken.get_encoding("cl100k_base")

    # 加载社区报告作为全局搜索的上下文
    INPUT_DIR = "./inputs/operation dulce"
    COMMUNITY_REPORT_TABLE = "create_final_community_reports"
    ENTITY_TABLE = "create_final_nodes"
    ENTITY_EMBEDDING_TABLE = "create_final_entities"
    COMMUNITY_LEVEL = 2

    try:
        # 读取parquet文件
        entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
        report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
        entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
    except FileNotFoundError as e:
        await cl.Message(content=f"错误:找不到所需的parquet文件。{str(e)}").send()
        return

    # 读取索引器报告和实体
    reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
    entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
    await cl.Message(content=f"报告记录数: {len(report_df)}").send()

    # 基于社区报告构建全局上下文
    context_builder = GlobalCommunityContext(
        community_reports=reports,
        entities=entities,
        token_encoder=token_encoder,
    )

    # 设置上下文构建器参数
    context_builder_params = {
        "use_community_summary": False,
        "shuffle_data": True,
        "include_community_rank": True,
        "min_community_rank": 0,
        "community_rank_name": "rank",
        "include_community_weight": True,
        "community_weight_name": "occurrence weight",
        "normalize_community_weight": True,
        "max_tokens": 12_000,
        "context_name": "Reports",
    }

    # 设置map LLM参数
    map_llm_params = {
        "max_tokens": 1000,
        "temperature": 0.0,
        "response_format": {"type": "json_object"},
    }

    # 设置reduce LLM参数
    reduce_llm_params = {
        "max_tokens": 2000,
        "temperature": 0.0,
    }

    # 初始化全局搜索引擎
    search_engine = GlobalSearch(
        llm=llm,
        context_builder=context_builder,
        token_encoder=token_encoder,
        max_data_tokens=12_000,
        map_llm_params=map_llm_params,
        reduce_llm_params=reduce_llm_params,
        allow_general_knowledge=False,
        json_mode=True,
        context_builder_params=context_builder_params,
        concurrent_coroutines=32,
        response_type="multiple paragraphs",
    )

    await cl.Message(content="全局搜索系统已准备就绪,请输入您的查询。").send()


@cl.on_message
async def main(message: cl.Message):
    global search_engine

    if search_engine is None:
        await cl.Message(content="搜索引擎尚未初始化。请稍后再试。").send()
        return

    query = message.content
    result = await search_engine.asearch(query)

    # 发送搜索结果
    await cl.Message(content=result.response).send()

    # 发送上下文数据报告
    context_data = f"上下文数据报告数量: {len(result.context_data['reports'])}"
    await cl.Message(content=context_data).send()

    # 发送LLM调用信息
    llm_info = f"LLM调用次数: {result.llm_calls}. LLM tokens数: {result.prompt_tokens}"
    await cl.Message(content=llm_info).send()


if __name__ == "__main__":
    cl.run()

local-ui

import os
import asyncio
import pandas as pd
import tiktoken
import chainlit as cl

from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
    read_indexer_covariates,
    read_indexer_entities,
    read_indexer_relationships,
    read_indexer_reports,
    read_indexer_text_units,
)
from graphrag.query.input.loaders.dfs import store_entity_semantic_embeddings
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import LocalSearchMixedContext
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore

# 全局变量
search_engine = None
question_generator = None
question_history = []


@cl.on_chat_start
async def on_chat_start():
    global search_engine, question_generator

    try:
        # 设置输入目录和数据表名
        INPUT_DIR = "./inputs/operation dulce"
        LANCEDB_URI = f"{INPUT_DIR}/lancedb"
        COMMUNITY_REPORT_TABLE = "create_final_community_reports"
        ENTITY_TABLE = "create_final_nodes"
        ENTITY_EMBEDDING_TABLE = "create_final_entities"
        RELATIONSHIP_TABLE = "create_final_relationships"
        COVARIATE_TABLE = "create_final_covariates"
        TEXT_UNIT_TABLE = "create_final_text_units"
        COMMUNITY_LEVEL = 2

        # 读取实体数据
        entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
        entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
        entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)

        # 设置和加载实体描述嵌入
        description_embedding_store = LanceDBVectorStore(collection_name="entity_description_embeddings")
        description_embedding_store.connect(db_uri=LANCEDB_URI)
        store_entity_semantic_embeddings(entities=entities, vectorstore=description_embedding_store)

        # 读取关系数据
        relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
        relationships = read_indexer_relationships(relationship_df)

        # 读取协变量数据
        covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")
        claims = read_indexer_covariates(covariate_df)
        covariates = {"claims": claims}

        # 读取社区报告数据
        report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
        reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)

        # 读取文本单元数据
        text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
        text_units = read_indexer_text_units(text_unit_df)

        # 设置LLM和嵌入模型
        api_key = os.environ.get("GRAPHRAG_API_KEY")
        if not api_key:
            raise ValueError("GRAPHRAG_API_KEY 环境变量未设置")
        llm_model = os.environ.get("GRAPHRAG_LLM_MODEL", "gpt-3.5-turbo")
        embedding_model = os.environ.get("GRAPHRAG_EMBEDDING_MODEL", "text-embedding-3-small")

        llm = ChatOpenAI(
            api_key=api_key,
            model=llm_model,
            api_type=OpenaiApiType.OpenAI,
            max_retries=20,
        )

        token_encoder = tiktoken.get_encoding("cl100k_base")

        text_embedder = OpenAIEmbedding(
            api_key=api_key,
            api_base=None,
            api_type=OpenaiApiType.OpenAI,
            model=embedding_model,
            deployment_name=embedding_model,
            max_retries=20,
        )

        # 创建本地搜索上下文构建器
        context_builder = LocalSearchMixedContext(
            community_reports=reports,
            text_units=text_units,
            entities=entities,
            relationships=relationships,
            covariates=covariates,
            entity_text_embeddings=description_embedding_store,
            embedding_vectorstore_key=EntityVectorStoreKey.ID,
            text_embedder=text_embedder,
            token_encoder=token_encoder,
        )

        # 设置本地搜索参数
        local_context_params = {
            "text_unit_prop": 0.5,
            "community_prop": 0.1,
            "conversation_history_max_turns": 5,
            "conversation_history_user_turns_only": True,
            "top_k_mapped_entities": 10,
            "top_k_relationships": 10,
            "include_entity_rank": True,
            "include_relationship_weight": True,
            "include_community_rank": False,
            "return_candidate_context": False,
            "embedding_vectorstore_key": EntityVectorStoreKey.ID,
            "max_tokens": 12_000,
        }

        llm_params = {
            "max_tokens": 2_000,
            "temperature": 0.0,
        }

        # 创建本地搜索引擎
        search_engine = LocalSearch(
            llm=llm,
            context_builder=context_builder,
            token_encoder=token_encoder,
            llm_params=llm_params,
            context_builder_params=local_context_params,
            response_type="multiple paragraphs",
        )

        # 创建问题生成器
        question_generator = LocalQuestionGen(
            llm=llm,
            context_builder=context_builder,
            token_encoder=token_encoder,
            llm_params=llm_params,
            context_builder_params=local_context_params,
        )

        await cl.Message(
            content="本地搜索系统和问题生成器已准备就绪。您可以开始提问,或输入 '/generate' 来生成新的问题。").send()

    except Exception as e:
        await cl.Message(content=f"初始化过程中发生错误: {str(e)}").send()


@cl.on_message
async def main(message: cl.Message):
    global search_engine, question_generator, question_history

    if search_engine is None or question_generator is None:
        await cl.Message(content="系统尚未完全初始化,请稍后再试。").send()
        return

    try:
        if message.content.strip().lower() == "/generate":
            # 生成新问题
            await cl.Message(content="正在生成问题,请稍候...").send()
            candidate_questions = await question_generator.agenerate(
                question_history=question_history, context_data=None, question_count=5
            )
            if isinstance(candidate_questions.response, list):
                questions_text = "\n".join([f"{i + 1}. {q}" for i, q in enumerate(candidate_questions.response)])
            else:
                questions_text = candidate_questions.response
            await cl.Message(content=f"以下是一些建议的问题:\n{questions_text}").send()
        else:
            # 执行搜索
            await cl.Message(content="正在处理您的问题,请稍候...").send()
            question_history.append(message.content)
            result = await search_engine.asearch(message.content)

            await cl.Message(content=result.response).send()

            context_data = f"上下文数据报告数量: {len(result.context_data['reports'])}"
            await cl.Message(content=context_data).send()

            llm_info = f"LLM调用次数: {result.llm_calls}. LLM tokens数: {result.prompt_tokens}"
            await cl.Message(content=llm_info).send()
    except Exception as e:
        error_message = f"处理您的请求时发生错误: {str(e)}"
        await cl.Message(content=error_message).send()
        print(f"Error in main function: {str(e)}")  # 为了调试目的,在控制台打印错误


if __name__ == "__main__":
    cl.run()

markdown_to_text.py

##运行方式 python markdown_to_text.py book.md book.txt

import markdown
from bs4 import BeautifulSoup
import re
import argparse


def markdown_to_text(markdown_content):
    # Convert Markdown to HTML
    html = markdown.markdown(markdown_content)

    # Use BeautifulSoup to parse HTML and extract text
    soup = BeautifulSoup(html, 'html.parser')
    text = soup.get_text(separator='\n\n')

    # Additional cleaning
    text = re.sub(r'\n{3,}', '\n\n', text)  # Replace multiple newlines with double newlines
    text = text.strip()  # Remove leading/trailing whitespace

    return text


def convert_file(input_file, output_file):
    # Read the Markdown file
    with open(input_file, 'r', encoding='utf-8') as f:
        markdown_content = f.read()

    # Convert to plain text
    plain_text = markdown_to_text(markdown_content)

    # Write the plain text to the output file
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(plain_text)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert Markdown file to plain text")
    parser.add_argument("input_file", help="Path to the input Markdown file")
    parser.add_argument("output_file", help="Path to the output plain text file")
    args = parser.parse_args()

    convert_file(args.input_file, args.output_file)
    print(f"Conversion complete. Plain text saved to {args.output_file}")