from pathlib import Path from typing import Any, Dict, Optional, Tuple import gradio as gr import numpy as np from easydict import EasyDict as edict from omegaconf import OmegaConf # from .sfm import SfmEngine from .utils import ( GRADIO_VERSION, gen_examples, generate_warp_images, get_matcher_zoo, load_config, ransac_zoo, run_matching, run_ransac, send_to_match, ) import os GITHUB_TOKEN = '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' GOOGLE_TOKEN = '1qnxd5DKomsYUH9_8dQ4Xvwatg_vI-vsh' if not (Path(__file__).parent / "../third_party/MatchAnything").exists(): print("**********************************") os.system(f"cd {str(Path(__file__).parent / '../third_party')} && git clone https://{GITHUB_TOKEN}@github.com/hxy-123/MatchAnything_HF.git && mv MatchAnything_HF MatchAnything && cd MatchAnything && gdown {GOOGLE_TOKEN} && unzip weights.zip") DESCRIPTION = '''
MatchAnything
Universal Cross-Modality Image Matching with Large-Scale Pre-Training
Project Page | Paper
> MatchAnything: Universal Cross-Modality Image Matching with Large-Scale Pre-Training > [Xingyi He](https://hxy-123.github.io/), [Hao Yu](https://ritianyu.github.io/), [Sida Peng](https://pengsida.net), [Dongli Tan](https://github.com/Cuistiano), [Zehong Shen](https://zehongs.github.io), [Hujun Bao](http://www.cad.zju.edu.cn/home/bao/), [Xiaowei Zhou](https://xzhou.me/) > Arxiv 2025 *This space is derived from [Image Matching WebUI](https://github.com/Vincentqyw/image-matching-webui). We are grateful to the authors for their contribution of the source code. *This space is currently operating in ZeroGPU mode, where the allocation of GPU resources requires additional time. As a result, the execution speed is significantly slower compared to a local machine. ''' CSS = """ #warning {background-color: #FFCCCB} .logs_class textarea {font-size: 12px !important} """ class ImageMatchingApp: def __init__(self, server_name="0.0.0.0", server_port=7860, **kwargs): self.server_name = server_name self.server_port = server_port self.config_path = kwargs.get("config", Path(__file__).parent / "config.yaml") self.cfg = load_config(self.config_path) self.matcher_zoo = get_matcher_zoo(self.cfg["matcher_zoo"]) self.app = None self.init_interface() # print all the keys def init_matcher_dropdown(self): algos = [] for k, v in self.cfg["matcher_zoo"].items(): if v.get("enable", True): algos.append(k) return algos def init_interface(self): with gr.Blocks(css=CSS) as self.app: with gr.Tab("Image Matching"): with gr.Row(): with gr.Column(scale=3): gr.Markdown(DESCRIPTION) with gr.Row(equal_height=False): with gr.Column(): with gr.Row(): matcher_list = gr.Dropdown( choices=self.init_matcher_dropdown(), value="matchanything_eloftr", label="Matching Model", interactive=True, ) match_image_src = gr.Radio( ( ["upload", "webcam", "clipboard"] if GRADIO_VERSION > "3" else ["upload", "webcam", "canvas"] ), label="Image Source", value="upload", ) with gr.Row(): input_image0 = gr.Image( label="Image 0", type="numpy", image_mode="RGB", height=300 if GRADIO_VERSION > "3" else None, interactive=True, ) input_image1 = gr.Image( label="Image 1", type="numpy", image_mode="RGB", height=300 if GRADIO_VERSION > "3" else None, interactive=True, ) with gr.Row(): button_reset = gr.Button(value="Reset") button_run = gr.Button(value="Run Match", variant="primary") with gr.Accordion("Advanced Setting", open=False): with gr.Accordion("Image Setting", open=True): with gr.Row(): image_force_resize_cb = gr.Checkbox( label="Force Resize", value=False, interactive=True, ) image_setting_height = gr.Slider( minimum=48, maximum=2048, step=16, label="Image Height", value=480, visible=False, ) image_setting_width = gr.Slider( minimum=64, maximum=2048, step=16, label="Image Width", value=640, visible=False, ) with gr.Accordion("Matching Setting", open=True): with gr.Row(): match_setting_threshold = gr.Slider( minimum=0.0, maximum=1, step=0.001, label="Match threshold", value=0.1, ) match_setting_max_keypoints = gr.Slider( minimum=10, maximum=10000, step=10, label="Max features", value=1000, ) # TODO: add line settings with gr.Row(): detect_keypoints_threshold = gr.Slider( minimum=0, maximum=1, step=0.001, label="Keypoint threshold", value=0.015, ) detect_line_threshold = ( # noqa: F841 gr.Slider( minimum=0.1, maximum=1, step=0.01, label="Line threshold", value=0.2, ) ) # matcher_lists = gr.Radio( # ["NN-mutual", "Dual-Softmax"], # label="Matcher mode", # value="NN-mutual", # ) with gr.Accordion("RANSAC Setting", open=True): with gr.Row(equal_height=False): ransac_method = gr.Dropdown( choices=ransac_zoo.keys(), value=self.cfg["defaults"]["ransac_method"], label="RANSAC Method", interactive=True, ) ransac_reproj_threshold = gr.Slider( minimum=0.0, maximum=12, step=0.01, label="Ransac Reproj threshold", value=8.0, ) ransac_confidence = gr.Slider( minimum=0.0, maximum=1, step=0.00001, label="Ransac Confidence", value=self.cfg["defaults"]["ransac_confidence"], ) ransac_max_iter = gr.Slider( minimum=0.0, maximum=100000, step=100, label="Ransac Iterations", value=self.cfg["defaults"]["ransac_max_iter"], ) button_ransac = gr.Button( value="Rerun RANSAC", variant="primary" ) with gr.Accordion("Geometry Setting", open=False): with gr.Row(equal_height=False): choice_geometry_type = gr.Radio( ["Fundamental", "Homography"], label="Reconstruct Geometry", value=self.cfg["defaults"]["setting_geometry"], ) # image resize image_force_resize_cb.select( fn=self._on_select_force_resize, inputs=image_force_resize_cb, outputs=[image_setting_width, image_setting_height], ) # collect inputs state_cache = gr.State({}) inputs = [ input_image0, input_image1, match_setting_threshold, match_setting_max_keypoints, detect_keypoints_threshold, matcher_list, ransac_method, ransac_reproj_threshold, ransac_confidence, ransac_max_iter, choice_geometry_type, gr.State(self.matcher_zoo), image_force_resize_cb, image_setting_width, image_setting_height, ] # Add some examples with gr.Row(): # Example inputs with gr.Accordion("Open for More: Examples", open=True): gr.Examples( examples=gen_examples(), inputs=inputs, outputs=[], fn=run_matching, cache_examples=False, label=( "Examples (click one of the images below to Run" " Match)." ), ) with gr.Accordion("Supported Algorithms", open=False): # add a table of supported algorithms self.display_supported_algorithms() with gr.Column(): with gr.Accordion("Open for More: Keypoints", open=True): output_keypoints = gr.Image(label="Keypoints", type="numpy") with gr.Accordion( ( "Open for More: Raw Matches" ), open=False, ): output_matches_raw = gr.Image( label="Raw Matches", type="numpy", ) with gr.Accordion( ( "Open for More: Ransac Matches" ), open=True, ): output_matches_ransac = gr.Image( label="Ransac Matches", type="numpy" ) with gr.Accordion( "Open for More: Matches Statistics", open=False ): output_pred = gr.File(label="Outputs", elem_id="download") matches_result_info = gr.JSON(label="Matches Statistics") matcher_info = gr.JSON(label="Match info") with gr.Accordion("Open for More: Warped Image", open=True): output_wrapped = gr.Image( label="Wrapped Pair", type="numpy" ) # send to input button_rerun = gr.Button( value="Send to Input Match Pair", variant="primary", ) with gr.Accordion( "Open for More: Geometry info", open=False ): geometry_result = gr.JSON( label="Reconstructed Geometry" ) # callbacks match_image_src.change( fn=self.ui_change_imagebox, inputs=match_image_src, outputs=input_image0, ) match_image_src.change( fn=self.ui_change_imagebox, inputs=match_image_src, outputs=input_image1, ) # collect outputs outputs = [ output_keypoints, output_matches_raw, output_matches_ransac, matches_result_info, matcher_info, geometry_result, output_wrapped, state_cache, output_pred, ] # button callbacks button_run.click(fn=run_matching, inputs=inputs, outputs=outputs) # Reset images reset_outputs = [ input_image0, input_image1, match_setting_threshold, match_setting_max_keypoints, detect_keypoints_threshold, matcher_list, input_image0, input_image1, match_image_src, output_keypoints, output_matches_raw, output_matches_ransac, matches_result_info, matcher_info, output_wrapped, geometry_result, ransac_method, ransac_reproj_threshold, ransac_confidence, ransac_max_iter, choice_geometry_type, output_pred, image_force_resize_cb, ] button_reset.click( fn=self.ui_reset_state, inputs=None, outputs=reset_outputs, ) # run ransac button action button_ransac.click( fn=run_ransac, inputs=[ state_cache, choice_geometry_type, ransac_method, ransac_reproj_threshold, ransac_confidence, ransac_max_iter, ], outputs=[ output_matches_ransac, matches_result_info, output_wrapped, output_pred, ], ) # send warped image to match button_rerun.click( fn=send_to_match, inputs=[state_cache], outputs=[input_image0, input_image1], ) # estimate geo choice_geometry_type.change( fn=generate_warp_images, inputs=[ input_image0, input_image1, geometry_result, choice_geometry_type, ], outputs=[output_wrapped, geometry_result], ) # with gr.Tab("Structure from Motion(under-dev)"): # sfm_ui = AppSfmUI( # noqa: F841 # { # **self.cfg, # "matcher_zoo": self.matcher_zoo, # "outputs": "experiments/sfm", # } # ) # sfm_ui.call_empty() def run(self): self.app.queue().launch( server_name=self.server_name, server_port=self.server_port, share=False, ) def ui_change_imagebox(self, choice): """ Updates the image box with the given choice. Args: choice (list): The list of image sources to be displayed in the image box. Returns: dict: A dictionary containing the updated value, sources, and type for the image box. """ ret_dict = { "value": None, # The updated value of the image box "__type__": "update", # The type of update for the image box } if GRADIO_VERSION > "3": return { **ret_dict, "sources": choice, # The list of image sources to be displayed } else: return { **ret_dict, "source": choice, # The list of image sources to be displayed } def _on_select_force_resize(self, visible: bool = False): return gr.update(visible=visible), gr.update(visible=visible) def ui_reset_state( self, *args: Any, ) -> Tuple[ Optional[np.ndarray], Optional[np.ndarray], float, int, float, str, Dict[str, Any], Dict[str, Any], str, Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray], Dict[str, Any], Dict[str, Any], Optional[np.ndarray], Dict[str, Any], str, int, float, int, bool, ]: """ Reset the state of the UI. Returns: tuple: A tuple containing the initial values for the UI state. """ key: str = list(self.matcher_zoo.keys())[ 0 ] # Get the first key from matcher_zoo # flush_logs() return ( None, # image0: Optional[np.ndarray] None, # image1: Optional[np.ndarray] self.cfg["defaults"]["match_threshold"], # matching_threshold: float self.cfg["defaults"]["max_keypoints"], # max_keypoints: int self.cfg["defaults"]["keypoint_threshold"], # keypoint_threshold: float key, # matcher: str self.ui_change_imagebox("upload"), # input image0: Dict[str, Any] self.ui_change_imagebox("upload"), # input image1: Dict[str, Any] "upload", # match_image_src: str None, # keypoints: Optional[np.ndarray] None, # raw matches: Optional[np.ndarray] None, # ransac matches: Optional[np.ndarray] {}, # matches result info: Dict[str, Any] {}, # matcher config: Dict[str, Any] None, # warped image: Optional[np.ndarray] {}, # geometry result: Dict[str, Any] self.cfg["defaults"]["ransac_method"], # ransac_method: str self.cfg["defaults"][ "ransac_reproj_threshold" ], # ransac_reproj_threshold: float self.cfg["defaults"]["ransac_confidence"], # ransac_confidence: float self.cfg["defaults"]["ransac_max_iter"], # ransac_max_iter: int self.cfg["defaults"]["setting_geometry"], # geometry: str None, # predictions False, ) def display_supported_algorithms(self, style="tab"): def get_link(link, tag="Link"): return "[{}]({})".format(tag, link) if link is not None else "None" data = [] cfg = self.cfg["matcher_zoo"] if style == "md": markdown_table = "| Algo. | Conference | Code | Project | Paper |\n" markdown_table += "| ----- | ---------- | ---- | ------- | ----- |\n" for k, v in cfg.items(): if not v["info"]["display"]: continue github_link = get_link(v["info"]["github"]) project_link = get_link(v["info"]["project"]) paper_link = get_link( v["info"]["paper"], ( Path(v["info"]["paper"]).name[-10:] if v["info"]["paper"] is not None else "Link" ), ) markdown_table += "{}|{}|{}|{}|{}\n".format( v["info"]["name"], # display name v["info"]["source"], github_link, project_link, paper_link, ) return gr.Markdown(markdown_table) elif style == "tab": for k, v in cfg.items(): if not v["info"].get("display", True): continue data.append( [ v["info"]["name"], v["info"]["source"], # v["info"]["github"], # v["info"]["paper"], # v["info"]["project"], ] ) tab = gr.Dataframe( # headers=["Algo.", "Conference", "Code", "Paper", "Project"], headers=["Algo.", "Conference"], # datatype=["str", "str", "str", "str", "str"], datatype=["str", "str"], # col_count=(5, "fixed"), col_count=(2, "fixed"), value=data, # wrap=True, # min_width = 1000, # height=1000, ) return tab class AppBaseUI: def __init__(self, cfg: Dict[str, Any] = {}): self.cfg = OmegaConf.create(cfg) self.inputs = edict({}) self.outputs = edict({}) self.ui = edict({}) def _init_ui(self): NotImplemented def call(self, **kwargs): NotImplemented def info(self): pass # gr.Info("SFM is under construction.") class AppSfmUI(AppBaseUI): def __init__(self, cfg: Dict[str, Any] = None): super().__init__(cfg) assert "matcher_zoo" in self.cfg self.matcher_zoo = self.cfg["matcher_zoo"] # self.sfm_engine = SfmEngine(cfg) self._init_ui() def init_retrieval_dropdown(self): algos = [] for k, v in self.cfg["retrieval_zoo"].items(): if v.get("enable", True): algos.append(k) return algos def _update_options(self, option): if option == "sparse": return gr.Textbox("sparse", visible=True) elif option == "dense": return gr.Textbox("dense", visible=True) else: return gr.Textbox("not set", visible=True) def _on_select_custom_params(self, value: bool = False): return gr.update(visible=value) def _init_ui(self): with gr.Row(): # data settting and camera settings with gr.Column(): # self.inputs.input_images = gr.File( # label="SfM", # interactive=True, # file_count="multiple", # min_width=300, # ) # # camera setting # with gr.Accordion("Camera Settings", open=True): # with gr.Column(): # with gr.Row(): # with gr.Column(): # self.inputs.camera_model = gr.Dropdown( # choices=[ # "PINHOLE", # "SIMPLE_RADIAL", # "OPENCV", # ], # value="PINHOLE", # label="Camera Model", # interactive=True, # ) # with gr.Column(): # gr.Checkbox( # label="Shared Params", # value=True, # interactive=True, # ) # camera_custom_params_cb = gr.Checkbox( # label="Custom Params", # value=False, # interactive=True, # ) # with gr.Row(): # self.inputs.camera_params = gr.Textbox( # label="Camera Params", # value="0,0,0,0", # interactive=False, # visible=False, # ) # camera_custom_params_cb.select( # fn=self._on_select_custom_params, # inputs=camera_custom_params_cb, # outputs=self.inputs.camera_params, # ) with gr.Accordion("Matching Settings", open=True): # feature extraction and matching setting with gr.Row(): # matcher setting self.inputs.matcher_key = gr.Dropdown( choices=self.matcher_zoo.keys(), value="matchanything_eloftr", label="Matching Model", interactive=True, ) with gr.Row(): with gr.Accordion("Advanced Settings", open=False): with gr.Column(): with gr.Row(): # matching setting self.inputs.max_keypoints = gr.Slider( label="Max Keypoints", minimum=100, maximum=10000, value=1000, interactive=True, ) self.inputs.keypoint_threshold = gr.Slider( label="Keypoint Threshold", minimum=0, maximum=1, value=0.01, ) with gr.Row(): self.inputs.match_threshold = gr.Slider( label="Match Threshold", minimum=0.01, maximum=12.0, value=0.2, ) self.inputs.ransac_threshold = gr.Slider( label="Ransac Threshold", minimum=0.01, maximum=12.0, value=4.0, step=0.01, interactive=True, ) with gr.Row(): self.inputs.ransac_confidence = gr.Slider( label="Ransac Confidence", minimum=0.01, maximum=1.0, value=0.9999, step=0.0001, interactive=True, ) self.inputs.ransac_max_iter = gr.Slider( label="Ransac Max Iter", minimum=1, maximum=100, value=100, step=1, interactive=True, ) with gr.Accordion("Scene Graph Settings", open=True): # mapping setting self.inputs.scene_graph = gr.Dropdown( choices=["all", "swin", "oneref"], value="all", label="Scene Graph", interactive=True, ) # global feature setting self.inputs.global_feature = gr.Dropdown( choices=self.init_retrieval_dropdown(), value="netvlad", label="Global features", interactive=True, ) self.inputs.top_k = gr.Slider( label="Number of Images per Image to Match", minimum=1, maximum=100, value=10, step=1, ) # button_match = gr.Button("Run Matching", variant="primary") # # mapping setting # with gr.Column(): # with gr.Accordion("Mapping Settings", open=True): # with gr.Row(): # with gr.Accordion("Buddle Settings", open=True): # with gr.Row(): # self.inputs.mapper_refine_focal_length = gr.Checkbox( # label="Refine Focal Length", # value=False, # interactive=True, # ) # self.inputs.mapper_refine_principle_points = ( # gr.Checkbox( # label="Refine Principle Points", # value=False, # interactive=True, # ) # ) # self.inputs.mapper_refine_extra_params = gr.Checkbox( # label="Refine Extra Params", # value=False, # interactive=True, # ) # with gr.Accordion("Retriangluation Settings", open=True): # gr.Textbox( # label="Retriangluation Details", # ) # self.ui.button_sfm = gr.Button("Run SFM", variant="primary") # self.outputs.model_3d = gr.Model3D( # interactive=True, # ) # self.outputs.output_image = gr.Image( # label="SFM Visualize", # type="numpy", # image_mode="RGB", # interactive=False, # ) def call_empty(self): # self.ui.button_sfm.click(fn=self.info, inputs=[], outputs=[]) pass def call(self): # self.ui.button_sfm.click( # fn=self.sfm_engine.call, # inputs=[ # self.inputs.matcher_key, # self.inputs.input_images, # images # self.inputs.camera_model, # self.inputs.camera_params, # self.inputs.max_keypoints, # self.inputs.keypoint_threshold, # self.inputs.match_threshold, # self.inputs.ransac_threshold, # self.inputs.ransac_confidence, # self.inputs.ransac_max_iter, # self.inputs.scene_graph, # self.inputs.global_feature, # self.inputs.top_k, # self.inputs.mapper_refine_focal_length, # self.inputs.mapper_refine_principle_points, # self.inputs.mapper_refine_extra_params, # ], # outputs=[self.outputs.model_3d, self.outputs.output_image], # ) pass