<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns="http://purl.org/rss/1.0/"
 xmlns:dc="http://purl.org/dc/elements/1.1/"
 xmlns:dcterms="http://purl.org/dc/terms/"
 xmlns:cc="http://web.resource.org/cc/"
 xmlns:prism="http://prismstandard.org/namespaces/basic/2.0/"
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 xmlns:admin="http://webns.net/mvcb/"
 xmlns:content="http://purl.org/rss/1.0/modules/content/">
    <channel rdf:about="https://www.mdpi.com/rss/journal/jimaging">
		<title>Journal of Imaging</title>
		<description>Latest open access articles published in J. Imaging at https://www.mdpi.com/journal/jimaging</description>
		<link>https://www.mdpi.com/journal/jimaging</link>
		<admin:generatorAgent rdf:resource="https://www.mdpi.com/journal/jimaging"/>
		<admin:errorReportsTo rdf:resource="mailto:support@mdpi.com"/>
		<dc:publisher>MDPI</dc:publisher>
		<dc:language>en</dc:language>
		<dc:rights>Creative Commons Attribution (CC-BY)</dc:rights>
						<prism:copyright>MDPI</prism:copyright>
		<prism:rightsAgent>support@mdpi.com</prism:rightsAgent>
		<image rdf:resource="https://pub.mdpi-res.com/img/design/mdpi-pub-logo.png?13cf3b5bd783e021?1772794056"/>
				<items>
			<rdf:Seq>
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/112" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/111" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/110" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/109" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/108" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/107" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/106" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/105" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/104" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/103" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/102" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/101" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/100" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/99" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/98" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/97" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/96" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/95" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/94" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/93" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/3/92" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/91" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/90" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/89" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/88" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/87" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/85" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/86" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/84" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/83" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/82" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/81" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/80" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/79" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/78" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/77" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/76" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/75" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/74" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/73" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/72" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/71" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/70" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/69" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/68" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/67" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/66" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/65" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/64" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/63" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/62" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/61" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/60" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/59" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/58" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/57" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/56" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/55" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/2/54" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/53" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/52" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/51" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/50" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/49" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/48" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/47" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/46" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/45" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/44" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/43" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/42" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/41" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/40" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/39" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/38" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/37" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/36" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/35" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/34" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/33" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/31" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/30" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/32" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/29" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/28" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/27" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/26" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/25" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/24" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/23" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/22" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/21" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/20" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/19" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/18" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/17" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/16" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/15" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/14" />
            				<rdf:li rdf:resource="https://www.mdpi.com/2313-433X/12/1/13" />
                    	</rdf:Seq>
		</items>
				<cc:license rdf:resource="https://creativecommons.org/licenses/by/4.0/" />
	</channel>

        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/112">

	<title>J. Imaging, Vol. 12, Pages 112: Endo-DET: A Domain-Specific Detection Framework for Multi-Class Endoscopic Disease Detection</title>
	<link>https://www.mdpi.com/2313-433X/12/3/112</link>
	<description>Gastrointestinal cancers account for roughly a quarter of global cancer incidence, and early detection through endoscopy has proven effective in reducing mortality. Multi-class endoscopic disease detection, however, faces three persistent challenges: feature redundancy from non-pathological content, severe illumination inconsistency across imaging modalities, and extreme scale variability with blurry boundaries. This paper introduces Endo-DET, a domain-specific detection framework addressing these challenges through three synergistic components. The Adaptive Lesion-Discriminative Filtering (ALDF) module achieves lesion-focused attention via sparse simplex projection, reducing complexity from O(N2) to O(&amp;amp;alpha;N2). The Global&amp;amp;ndash;Local Illumination Modulation Neck (GLIM-Neck) enables illumination-aware multi-scale fusion through four cooperative mechanisms, maintaining stable performance across white-light endoscopy, narrow-band imaging, and chromoendoscopy. The Lesion-aware Unified Calibration and Illumination-robust Discrimination (LUCID) module uses dual-stream reciprocal modulation to integrate boundary-sensitive textures with global semantics while suppressing instrument artifacts. Experiments on EDD2020, Kvasir-SEG, PolypGen2021, and CVC-ClinicDB show that Endo-DET improves mAP50-95 over the DEIM baseline by 5.8, 10.8, 4.1, and 10.1 percentage points respectively, with mAP75 gains of 6.1, 10.3, 6.8, and 9.3 points, and Recall50-95 improvements of 10.9, 12.1, 11.1, and 11.5 points. Running at 330 FPS with TensorRT FP16 optimization, Endo-DET achieves consistent cross-dataset improvements while maintaining real-time capability, providing a methodological foundation for clinical computer-aided diagnosis.</description>
	<pubDate>2026-03-06</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 112: Endo-DET: A Domain-Specific Detection Framework for Multi-Class Endoscopic Disease Detection</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/112">doi: 10.3390/jimaging12030112</a></p>
	<p>Authors:
		Yijie Lu
		Yixiang Zhao
		Qiang Yu
		Wei Shao
		Renbin Shen
		</p>
	<p>Gastrointestinal cancers account for roughly a quarter of global cancer incidence, and early detection through endoscopy has proven effective in reducing mortality. Multi-class endoscopic disease detection, however, faces three persistent challenges: feature redundancy from non-pathological content, severe illumination inconsistency across imaging modalities, and extreme scale variability with blurry boundaries. This paper introduces Endo-DET, a domain-specific detection framework addressing these challenges through three synergistic components. The Adaptive Lesion-Discriminative Filtering (ALDF) module achieves lesion-focused attention via sparse simplex projection, reducing complexity from O(N2) to O(&amp;amp;alpha;N2). The Global&amp;amp;ndash;Local Illumination Modulation Neck (GLIM-Neck) enables illumination-aware multi-scale fusion through four cooperative mechanisms, maintaining stable performance across white-light endoscopy, narrow-band imaging, and chromoendoscopy. The Lesion-aware Unified Calibration and Illumination-robust Discrimination (LUCID) module uses dual-stream reciprocal modulation to integrate boundary-sensitive textures with global semantics while suppressing instrument artifacts. Experiments on EDD2020, Kvasir-SEG, PolypGen2021, and CVC-ClinicDB show that Endo-DET improves mAP50-95 over the DEIM baseline by 5.8, 10.8, 4.1, and 10.1 percentage points respectively, with mAP75 gains of 6.1, 10.3, 6.8, and 9.3 points, and Recall50-95 improvements of 10.9, 12.1, 11.1, and 11.5 points. Running at 330 FPS with TensorRT FP16 optimization, Endo-DET achieves consistent cross-dataset improvements while maintaining real-time capability, providing a methodological foundation for clinical computer-aided diagnosis.</p>
	]]></content:encoded>

	<dc:title>Endo-DET: A Domain-Specific Detection Framework for Multi-Class Endoscopic Disease Detection</dc:title>
			<dc:creator>Yijie Lu</dc:creator>
			<dc:creator>Yixiang Zhao</dc:creator>
			<dc:creator>Qiang Yu</dc:creator>
			<dc:creator>Wei Shao</dc:creator>
			<dc:creator>Renbin Shen</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030112</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-03-06</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-03-06</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>112</prism:startingPage>
		<prism:doi>10.3390/jimaging12030112</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/112</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/111">

	<title>J. Imaging, Vol. 12, Pages 111: Evidence-Guided Diagnostic Reasoning for Pediatric Chest Radiology Based on Multimodal Large Language Models</title>
	<link>https://www.mdpi.com/2313-433X/12/3/111</link>
	<description>Pediatric respiratory diseases are a leading cause of hospital admissions and childhood mortality worldwide, highlighting the critical need for accurate and timely diagnosis to support effective treatment and long-term care. Chest radiography remains the most widely used imaging modality for pediatric pulmonary assessment. Consequently, reliable AI-assisted diagnostic methods are essential for alleviating the workload of clinical radiologists. However, most existing deep learning-based approaches are data-driven and formulate diagnosis as a black-box image classification task, resulting in limited interpretability and reduced clinical trustworthiness. To address these challenges, we propose a trustworthy two-stage diagnostic paradigm for pediatric chest X-ray diagnosis that closely aligns with the radiological workflow in clinical practice, in which the diagnosis procedure is constrained by evidence. In the first stage, a vision&amp;amp;ndash;language model fine-tuned on pediatric data identifies radiological findings from chest radiographs, producing structured and interpretable diagnostic evidence. In the second stage, a multimodal large language model integrates the radiograph, extracted findings, patient demographic information, and external medical domain knowledge with RAG mechanism to generate the final diagnosis. Experiments conducted on the VinDr-PCXR dataset demonstrate that our method achieves 90.1% diagnostic accuracy, 70.9% F1-score, and 82.5% AUC, representing up to a 13.1% increase in diagnosis accuracy over the state-of-the-art baselines. These results validate the effectiveness of combining multimodal reasoning with explicit medical evidence and domain knowledge, and indicate the strong potential of the proposed approach for trustworthy pediatric radiology diagnosis.</description>
	<pubDate>2026-03-06</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 111: Evidence-Guided Diagnostic Reasoning for Pediatric Chest Radiology Based on Multimodal Large Language Models</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/111">doi: 10.3390/jimaging12030111</a></p>
	<p>Authors:
		Yuze Zhao
		Qing Wang
		Yingwen Wang
		Ruiwei Zhao
		Rui Feng
		Xiaobo Zhang
		</p>
	<p>Pediatric respiratory diseases are a leading cause of hospital admissions and childhood mortality worldwide, highlighting the critical need for accurate and timely diagnosis to support effective treatment and long-term care. Chest radiography remains the most widely used imaging modality for pediatric pulmonary assessment. Consequently, reliable AI-assisted diagnostic methods are essential for alleviating the workload of clinical radiologists. However, most existing deep learning-based approaches are data-driven and formulate diagnosis as a black-box image classification task, resulting in limited interpretability and reduced clinical trustworthiness. To address these challenges, we propose a trustworthy two-stage diagnostic paradigm for pediatric chest X-ray diagnosis that closely aligns with the radiological workflow in clinical practice, in which the diagnosis procedure is constrained by evidence. In the first stage, a vision&amp;amp;ndash;language model fine-tuned on pediatric data identifies radiological findings from chest radiographs, producing structured and interpretable diagnostic evidence. In the second stage, a multimodal large language model integrates the radiograph, extracted findings, patient demographic information, and external medical domain knowledge with RAG mechanism to generate the final diagnosis. Experiments conducted on the VinDr-PCXR dataset demonstrate that our method achieves 90.1% diagnostic accuracy, 70.9% F1-score, and 82.5% AUC, representing up to a 13.1% increase in diagnosis accuracy over the state-of-the-art baselines. These results validate the effectiveness of combining multimodal reasoning with explicit medical evidence and domain knowledge, and indicate the strong potential of the proposed approach for trustworthy pediatric radiology diagnosis.</p>
	]]></content:encoded>

	<dc:title>Evidence-Guided Diagnostic Reasoning for Pediatric Chest Radiology Based on Multimodal Large Language Models</dc:title>
			<dc:creator>Yuze Zhao</dc:creator>
			<dc:creator>Qing Wang</dc:creator>
			<dc:creator>Yingwen Wang</dc:creator>
			<dc:creator>Ruiwei Zhao</dc:creator>
			<dc:creator>Rui Feng</dc:creator>
			<dc:creator>Xiaobo Zhang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030111</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-03-06</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-03-06</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>111</prism:startingPage>
		<prism:doi>10.3390/jimaging12030111</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/111</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/110">

	<title>J. Imaging, Vol. 12, Pages 110: Forensic Analysis for Source Camera Identification from EXIF Metadata</title>
	<link>https://www.mdpi.com/2313-433X/12/3/110</link>
	<description>Source camera identification on smartphones constitutes a fundamental task in multimedia forensics, providing essential support for applications such as image copyright protection, illegal content tracking, and digital evidence verification. Numerous techniques have been developed for this task over the past decades. Among existing approaches, Photo-Response Non-Uniformity (PRNU) has been widely recognized as a reliable device-specific fingerprint and has demonstrated remarkable performance in real-world applications. Nevertheless, the rapid advancement of computational photography technologies has introduced significant challenges: modern devices often exhibit anomalous behaviors under PRNU-based analysis. For instance, images captured by different devices may exhibit unexpected correlations, while images captured by the same device can vary substantially in their PRNU patterns. Current approaches are incapable of automatically exploring the underlying causes of these anomalous behaviors. To address this limitation, we propose a simple yet effective forensic analysis framework leveraging Exchangeable Image File Format (EXIF) metadata. Specifically, we represent EXIF metadata as type-aware word embeddings to preserve contextual information across tags. This design enables visual interpretation of the model&amp;amp;rsquo;s decision-making process and provides complementary insights for identifying the anomalous behaviors observed in modern devices. Extensive experiments conducted on three public benchmark datasets demonstrate that the proposed method not only achieves state-of-the-art performance for source camera identification but also provides valuable insights into anomalous device behaviors.</description>
	<pubDate>2026-03-04</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 110: Forensic Analysis for Source Camera Identification from EXIF Metadata</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/110">doi: 10.3390/jimaging12030110</a></p>
	<p>Authors:
		Pengpeng Yang
		Chen Zhou
		Daniele Baracchi
		Dasara Shullani
		Yaobin Zou
		Alessandro Piva
		</p>
	<p>Source camera identification on smartphones constitutes a fundamental task in multimedia forensics, providing essential support for applications such as image copyright protection, illegal content tracking, and digital evidence verification. Numerous techniques have been developed for this task over the past decades. Among existing approaches, Photo-Response Non-Uniformity (PRNU) has been widely recognized as a reliable device-specific fingerprint and has demonstrated remarkable performance in real-world applications. Nevertheless, the rapid advancement of computational photography technologies has introduced significant challenges: modern devices often exhibit anomalous behaviors under PRNU-based analysis. For instance, images captured by different devices may exhibit unexpected correlations, while images captured by the same device can vary substantially in their PRNU patterns. Current approaches are incapable of automatically exploring the underlying causes of these anomalous behaviors. To address this limitation, we propose a simple yet effective forensic analysis framework leveraging Exchangeable Image File Format (EXIF) metadata. Specifically, we represent EXIF metadata as type-aware word embeddings to preserve contextual information across tags. This design enables visual interpretation of the model&amp;amp;rsquo;s decision-making process and provides complementary insights for identifying the anomalous behaviors observed in modern devices. Extensive experiments conducted on three public benchmark datasets demonstrate that the proposed method not only achieves state-of-the-art performance for source camera identification but also provides valuable insights into anomalous device behaviors.</p>
	]]></content:encoded>

	<dc:title>Forensic Analysis for Source Camera Identification from EXIF Metadata</dc:title>
			<dc:creator>Pengpeng Yang</dc:creator>
			<dc:creator>Chen Zhou</dc:creator>
			<dc:creator>Daniele Baracchi</dc:creator>
			<dc:creator>Dasara Shullani</dc:creator>
			<dc:creator>Yaobin Zou</dc:creator>
			<dc:creator>Alessandro Piva</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030110</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-03-04</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-03-04</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>110</prism:startingPage>
		<prism:doi>10.3390/jimaging12030110</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/110</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/109">

	<title>J. Imaging, Vol. 12, Pages 109: A Hierarchical Multi-View Deep Learning Framework for Autism Classification Using Structural and Functional MRI</title>
	<link>https://www.mdpi.com/2313-433X/12/3/109</link>
	<description>Autism classification is challenging due to the subtle, heterogeneous, and overlapping neural activation profiles that occur in individuals with autism. Novel deep learning approaches, such as Convolutional Neural Networks (CNNs) and their variants, as well as Transformers, have shown moderate performance in discriminating between autism and normal cohorts; yet, they often struggle to jointly capture the spatial&amp;amp;ndash;structural and temporal&amp;amp;ndash;functional variations present in autistic brains. To overcome these shortcomings, we propose a novel hierarchical deep learning framework that extracts the inherent spatial dependencies from the dual-modal MRI scans. For sMRI, we develop a 3D Hierarchical Convolutional Neural Network to capture both fine and coarse anatomical structures via multi-view projections along the axial, sagittal, and coronal planes. For the fMRI case, we introduced a bidirectional LSTM-based temporal encoder to examine regional brain dynamics and functional connectivity. The sequential embeddings and correlations are combined into a unified spatiotemporal representation of functional imaging, which is then classified using a multilayer perceptron to ensure continuity in diagnostic predictions across the examined modalities. Finally, a cross-modality fusion scheme was employed to integrate feature representations of both modalities. Extensive evaluations on the ABIDE I dataset (NYU repository) demonstrate that our proposed framework outperforms existing baselines, including Vision/Swin Transformers and various newly developed CNN variants. For the sMRI branch, we achieved 90.19 &amp;amp;plusmn; 0.12% accuracy (precision: 90.85 &amp;amp;plusmn; 0.16%, recall: 89.27 &amp;amp;plusmn; 0.19%, F1-score: 90.05 &amp;amp;plusmn; 0.14%, and focal loss: 0.3982). For the fMRI branch, we achieved an accuracy of 88.93 &amp;amp;plusmn; 0.15% (precision: 89.78 &amp;amp;plusmn; 0.18%, recall: 88.29 &amp;amp;plusmn; 0.20%, F1-score: 89.03 &amp;amp;plusmn; 0.17%, and focal loss of 0.4437). These outcomes affirm the superior generalization and robustness of the proposed framework for integrating structural and functional brain representations to achieve accurate autism classification.</description>
	<pubDate>2026-03-04</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 109: A Hierarchical Multi-View Deep Learning Framework for Autism Classification Using Structural and Functional MRI</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/109">doi: 10.3390/jimaging12030109</a></p>
	<p>Authors:
		Nayif Mohammed Hammash
		Mohammed Chachan Younis
		</p>
	<p>Autism classification is challenging due to the subtle, heterogeneous, and overlapping neural activation profiles that occur in individuals with autism. Novel deep learning approaches, such as Convolutional Neural Networks (CNNs) and their variants, as well as Transformers, have shown moderate performance in discriminating between autism and normal cohorts; yet, they often struggle to jointly capture the spatial&amp;amp;ndash;structural and temporal&amp;amp;ndash;functional variations present in autistic brains. To overcome these shortcomings, we propose a novel hierarchical deep learning framework that extracts the inherent spatial dependencies from the dual-modal MRI scans. For sMRI, we develop a 3D Hierarchical Convolutional Neural Network to capture both fine and coarse anatomical structures via multi-view projections along the axial, sagittal, and coronal planes. For the fMRI case, we introduced a bidirectional LSTM-based temporal encoder to examine regional brain dynamics and functional connectivity. The sequential embeddings and correlations are combined into a unified spatiotemporal representation of functional imaging, which is then classified using a multilayer perceptron to ensure continuity in diagnostic predictions across the examined modalities. Finally, a cross-modality fusion scheme was employed to integrate feature representations of both modalities. Extensive evaluations on the ABIDE I dataset (NYU repository) demonstrate that our proposed framework outperforms existing baselines, including Vision/Swin Transformers and various newly developed CNN variants. For the sMRI branch, we achieved 90.19 &amp;amp;plusmn; 0.12% accuracy (precision: 90.85 &amp;amp;plusmn; 0.16%, recall: 89.27 &amp;amp;plusmn; 0.19%, F1-score: 90.05 &amp;amp;plusmn; 0.14%, and focal loss: 0.3982). For the fMRI branch, we achieved an accuracy of 88.93 &amp;amp;plusmn; 0.15% (precision: 89.78 &amp;amp;plusmn; 0.18%, recall: 88.29 &amp;amp;plusmn; 0.20%, F1-score: 89.03 &amp;amp;plusmn; 0.17%, and focal loss of 0.4437). These outcomes affirm the superior generalization and robustness of the proposed framework for integrating structural and functional brain representations to achieve accurate autism classification.</p>
	]]></content:encoded>

	<dc:title>A Hierarchical Multi-View Deep Learning Framework for Autism Classification Using Structural and Functional MRI</dc:title>
			<dc:creator>Nayif Mohammed Hammash</dc:creator>
			<dc:creator>Mohammed Chachan Younis</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030109</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-03-04</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-03-04</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>109</prism:startingPage>
		<prism:doi>10.3390/jimaging12030109</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/109</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/108">

	<title>J. Imaging, Vol. 12, Pages 108: Optimizing Radiographic Diagnosis Through Signal-Balanced Convolutional Models</title>
	<link>https://www.mdpi.com/2313-433X/12/3/108</link>
	<description>Accurate interpretation of chest radiographs is central to the early diagnosis and management of pulmonary disorders. This study introduces an explainable deep learning framework that integrates biomedical signal fidelity analysis with transfer learning to enhance diagnostic reliability and transparency. Using the publicly available COVID-19 Radiography Dataset (21,165 chest X-ray images across four classes: COVID-19, Viral Pneumonia, Lung Opacity, and Normal), three architectures, namely baseline Convolutional Neural Network (CNN), ResNet-50, and EfficientNetB3, were trained and evaluated under varied class-balancing and hyperparameter configurations. Signal preservation was quantitatively verified using the Structural Similarity Index Measure (SSIM = 0.93 &amp;amp;plusmn; 0.02), ensuring that preprocessing retained key diagnostic features. Among all models, ResNet-50 achieved the highest classification accuracy (93.7%) and macro-AUC = 0.97 (class-balanced), whereas EfficientNetB3 demonstrated superior generalization with reduced parameter overhead. Gradient-weighted Class Activation Mapping (Grad-CAM) visualizations confirmed anatomically coherent activations aligned with pathological lung regions, substantiating clinical interpretability. The integration of signal fidelity metrics with explainable deep learning presents a reproducible and computationally efficient framework for medical image analysis. These findings highlight the potential of signal-aware transfer learning to support reliable, transparent, and resource-efficient diagnostic decision-making in radiology and other imaging-based medical domains.</description>
	<pubDate>2026-03-04</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 108: Optimizing Radiographic Diagnosis Through Signal-Balanced Convolutional Models</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/108">doi: 10.3390/jimaging12030108</a></p>
	<p>Authors:
		Sakina Juzar Neemuchwala
		Raja Hashim Ali
		Qamar Abbas
		Talha Ali Khan
		Ambreen Shahnaz
		Iftikhar Ahmed
		</p>
	<p>Accurate interpretation of chest radiographs is central to the early diagnosis and management of pulmonary disorders. This study introduces an explainable deep learning framework that integrates biomedical signal fidelity analysis with transfer learning to enhance diagnostic reliability and transparency. Using the publicly available COVID-19 Radiography Dataset (21,165 chest X-ray images across four classes: COVID-19, Viral Pneumonia, Lung Opacity, and Normal), three architectures, namely baseline Convolutional Neural Network (CNN), ResNet-50, and EfficientNetB3, were trained and evaluated under varied class-balancing and hyperparameter configurations. Signal preservation was quantitatively verified using the Structural Similarity Index Measure (SSIM = 0.93 &amp;amp;plusmn; 0.02), ensuring that preprocessing retained key diagnostic features. Among all models, ResNet-50 achieved the highest classification accuracy (93.7%) and macro-AUC = 0.97 (class-balanced), whereas EfficientNetB3 demonstrated superior generalization with reduced parameter overhead. Gradient-weighted Class Activation Mapping (Grad-CAM) visualizations confirmed anatomically coherent activations aligned with pathological lung regions, substantiating clinical interpretability. The integration of signal fidelity metrics with explainable deep learning presents a reproducible and computationally efficient framework for medical image analysis. These findings highlight the potential of signal-aware transfer learning to support reliable, transparent, and resource-efficient diagnostic decision-making in radiology and other imaging-based medical domains.</p>
	]]></content:encoded>

	<dc:title>Optimizing Radiographic Diagnosis Through Signal-Balanced Convolutional Models</dc:title>
			<dc:creator>Sakina Juzar Neemuchwala</dc:creator>
			<dc:creator>Raja Hashim Ali</dc:creator>
			<dc:creator>Qamar Abbas</dc:creator>
			<dc:creator>Talha Ali Khan</dc:creator>
			<dc:creator>Ambreen Shahnaz</dc:creator>
			<dc:creator>Iftikhar Ahmed</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030108</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-03-04</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-03-04</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>108</prism:startingPage>
		<prism:doi>10.3390/jimaging12030108</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/108</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/107">

	<title>J. Imaging, Vol. 12, Pages 107: Development of Surveillance Robots Based on Face Recognition Using High-Order Statistical Features and Evidence Theory</title>
	<link>https://www.mdpi.com/2313-433X/12/3/107</link>
	<description>The recent advancements in technologies such as artificial intelligence (AI), computer vision (CV), and Internet of Things (IoT) have significantly extended various fields, particularly in surveillance systems. These innovations enable real-time facial recognition processing, enhancing security and ensuring safety. However, mobile robots are commonly employed in surveillance systems to handle risky tasks that are beyond human capability. In this paper, we present a prototype of a cost-effective mobile surveillance robot built on the Raspberry PI 4, designed for integration into various industrial environments. This smart robot detects intruders using IoT and face recognition technology. The proposed system is equipped with a passive infrared (PIR) sensor and a camera for capturing live-streaming video and photos, which are sent to the control room through IoT technology. Additionally, the system uses face recognition algorithms to differentiate between company staff and potential intruders. The face recognition method combines high-order statistical features and evidence theory to improve facial recognition accuracy and robustness. High-order statistical features are used to capture complex patterns in facial images, enhancing discrimination between individuals. Evidence theory is employed to integrate multiple information sources, allowing for better decision-making under uncertainty. This approach effectively addresses challenges such as variations in lighting, facial expressions, and occlusions, resulting in a more reliable and accurate face recognition system. When the system detects an unfamiliar individual, it sends out alert notifications and emails to the control room with the captured picture using IoT. A web interface has also been set up to control the robot from a distance through Wi-Fi connection. The proposed face recognition method is evaluated, and a comparative analysis with existing techniques is conducted. Experimental results with 400 test images of 40 individuals demonstrate the effectiveness of combining various attribute images in improving human face recognition performance. Experimental results indicate that the algorithm can identify human faces with an accuracy of 98.63%.</description>
	<pubDate>2026-02-28</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 107: Development of Surveillance Robots Based on Face Recognition Using High-Order Statistical Features and Evidence Theory</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/107">doi: 10.3390/jimaging12030107</a></p>
	<p>Authors:
		Slim Ben Chaabane
		Rafika Harrabi
		Anas Bushnag
		Hassene Seddik
		</p>
	<p>The recent advancements in technologies such as artificial intelligence (AI), computer vision (CV), and Internet of Things (IoT) have significantly extended various fields, particularly in surveillance systems. These innovations enable real-time facial recognition processing, enhancing security and ensuring safety. However, mobile robots are commonly employed in surveillance systems to handle risky tasks that are beyond human capability. In this paper, we present a prototype of a cost-effective mobile surveillance robot built on the Raspberry PI 4, designed for integration into various industrial environments. This smart robot detects intruders using IoT and face recognition technology. The proposed system is equipped with a passive infrared (PIR) sensor and a camera for capturing live-streaming video and photos, which are sent to the control room through IoT technology. Additionally, the system uses face recognition algorithms to differentiate between company staff and potential intruders. The face recognition method combines high-order statistical features and evidence theory to improve facial recognition accuracy and robustness. High-order statistical features are used to capture complex patterns in facial images, enhancing discrimination between individuals. Evidence theory is employed to integrate multiple information sources, allowing for better decision-making under uncertainty. This approach effectively addresses challenges such as variations in lighting, facial expressions, and occlusions, resulting in a more reliable and accurate face recognition system. When the system detects an unfamiliar individual, it sends out alert notifications and emails to the control room with the captured picture using IoT. A web interface has also been set up to control the robot from a distance through Wi-Fi connection. The proposed face recognition method is evaluated, and a comparative analysis with existing techniques is conducted. Experimental results with 400 test images of 40 individuals demonstrate the effectiveness of combining various attribute images in improving human face recognition performance. Experimental results indicate that the algorithm can identify human faces with an accuracy of 98.63%.</p>
	]]></content:encoded>

	<dc:title>Development of Surveillance Robots Based on Face Recognition Using High-Order Statistical Features and Evidence Theory</dc:title>
			<dc:creator>Slim Ben Chaabane</dc:creator>
			<dc:creator>Rafika Harrabi</dc:creator>
			<dc:creator>Anas Bushnag</dc:creator>
			<dc:creator>Hassene Seddik</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030107</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-28</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-28</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>107</prism:startingPage>
		<prism:doi>10.3390/jimaging12030107</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/107</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/106">

	<title>J. Imaging, Vol. 12, Pages 106: Vision&amp;ndash;Language Models for Transmission Line Fault Detection: A New Approach for Grid Reliability and Optimization</title>
	<link>https://www.mdpi.com/2313-433X/12/3/106</link>
	<description>Reliable fault detection along transmission corridors is essential for preventing small defects from developing into long outages and costly emergency operations. This study aims to improve the field reliability of an open vocabulary vision language backbone without retraining the large model in an end-to-end manner. The work focuses on four operational fault classes in multi-region corridor imagery collected during routine inspections and uses a Florence-2 vision language model as the base recognizer. On top of this backbone, three domain-specific components are introduced. A subclass-aware fusion scheme keeps probability mass within the active parent concept so that insulator icing and conductor icing produce stable, action-oriented decisions. A Power-Line Focus Then Crop normalization uses an attention-guided corridor window together with isotropic resizing so that thin conductors and small fittings remain visible in the processed image. A corridor geo prior reduces scores as the distance from the mapped centerline increases and in this way suppresses detections that lie outside the corridor. All methods are evaluated under a shared preprocessing and scoring pipeline in training-free and parameter-efficient tuning modes. Experiments on unseen regions show higher accuracy for skinny and low-contrast faults, fewer false alarms outside the right-of-way, and improved score calibration in the confidence range used for triage, while keeping throughput and memory usage suitable for unmanned aerial vehicles and substation edge devices.</description>
	<pubDate>2026-02-28</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 106: Vision&amp;ndash;Language Models for Transmission Line Fault Detection: A New Approach for Grid Reliability and Optimization</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/106">doi: 10.3390/jimaging12030106</a></p>
	<p>Authors:
		Runle Yu
		Lihao Mai
		Yang Weng
		Qiushi Cui
		Guochang Xu
		Pengliang Ren
		</p>
	<p>Reliable fault detection along transmission corridors is essential for preventing small defects from developing into long outages and costly emergency operations. This study aims to improve the field reliability of an open vocabulary vision language backbone without retraining the large model in an end-to-end manner. The work focuses on four operational fault classes in multi-region corridor imagery collected during routine inspections and uses a Florence-2 vision language model as the base recognizer. On top of this backbone, three domain-specific components are introduced. A subclass-aware fusion scheme keeps probability mass within the active parent concept so that insulator icing and conductor icing produce stable, action-oriented decisions. A Power-Line Focus Then Crop normalization uses an attention-guided corridor window together with isotropic resizing so that thin conductors and small fittings remain visible in the processed image. A corridor geo prior reduces scores as the distance from the mapped centerline increases and in this way suppresses detections that lie outside the corridor. All methods are evaluated under a shared preprocessing and scoring pipeline in training-free and parameter-efficient tuning modes. Experiments on unseen regions show higher accuracy for skinny and low-contrast faults, fewer false alarms outside the right-of-way, and improved score calibration in the confidence range used for triage, while keeping throughput and memory usage suitable for unmanned aerial vehicles and substation edge devices.</p>
	]]></content:encoded>

	<dc:title>Vision&amp;amp;ndash;Language Models for Transmission Line Fault Detection: A New Approach for Grid Reliability and Optimization</dc:title>
			<dc:creator>Runle Yu</dc:creator>
			<dc:creator>Lihao Mai</dc:creator>
			<dc:creator>Yang Weng</dc:creator>
			<dc:creator>Qiushi Cui</dc:creator>
			<dc:creator>Guochang Xu</dc:creator>
			<dc:creator>Pengliang Ren</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030106</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-28</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-28</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>106</prism:startingPage>
		<prism:doi>10.3390/jimaging12030106</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/106</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/105">

	<title>J. Imaging, Vol. 12, Pages 105: Stereo Gaussian Splatting with Adaptive Scene Depth Estimation for Semantic Mapping</title>
	<link>https://www.mdpi.com/2313-433X/12/3/105</link>
	<description>Simultaneous Localization and Mapping (SLAM) is a fundamental capability in robotics and augmented reality. However, achieving accurate geometric reconstruction and consistent semantic understanding in complex environments remains challenging. Although recent neural implicit representations have improved reconstruction quality, they often suffer from high computational cost and the forgetting phenomenon during online mapping. In this paper, we propose StereoGS-SLAM, a stereo semantic SLAM framework based on 3D Gaussian Splatting (3DGS) for explicit scene representation. Unlike existing approaches, StereoGS-SLAM operates on passive RGB stereo inputs without requiring active depth sensors. An adaptive depth estimation strategy is introduced to dynamically refine Gaussian scales based on real-time stereo depth estimates, ensuring robust and scale-consistent reconstruction. In addition, we propose a hybrid keyframe selection strategy that integrates motion-aware selection with lightweight random sampling to improve keyframe diversity and maintain stable, real-time optimization. Experimental evaluations demonstrate that StereoGS-SLAM achieves consistent and competitive localization, rendering, and semantic reconstruction performance compared with recent 3DGS-based SLAM systems.</description>
	<pubDate>2026-02-28</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 105: Stereo Gaussian Splatting with Adaptive Scene Depth Estimation for Semantic Mapping</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/105">doi: 10.3390/jimaging12030105</a></p>
	<p>Authors:
		Chenhui Fu
		Jiangang Lu
		</p>
	<p>Simultaneous Localization and Mapping (SLAM) is a fundamental capability in robotics and augmented reality. However, achieving accurate geometric reconstruction and consistent semantic understanding in complex environments remains challenging. Although recent neural implicit representations have improved reconstruction quality, they often suffer from high computational cost and the forgetting phenomenon during online mapping. In this paper, we propose StereoGS-SLAM, a stereo semantic SLAM framework based on 3D Gaussian Splatting (3DGS) for explicit scene representation. Unlike existing approaches, StereoGS-SLAM operates on passive RGB stereo inputs without requiring active depth sensors. An adaptive depth estimation strategy is introduced to dynamically refine Gaussian scales based on real-time stereo depth estimates, ensuring robust and scale-consistent reconstruction. In addition, we propose a hybrid keyframe selection strategy that integrates motion-aware selection with lightweight random sampling to improve keyframe diversity and maintain stable, real-time optimization. Experimental evaluations demonstrate that StereoGS-SLAM achieves consistent and competitive localization, rendering, and semantic reconstruction performance compared with recent 3DGS-based SLAM systems.</p>
	]]></content:encoded>

	<dc:title>Stereo Gaussian Splatting with Adaptive Scene Depth Estimation for Semantic Mapping</dc:title>
			<dc:creator>Chenhui Fu</dc:creator>
			<dc:creator>Jiangang Lu</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030105</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-28</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-28</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>105</prism:startingPage>
		<prism:doi>10.3390/jimaging12030105</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/105</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/104">

	<title>J. Imaging, Vol. 12, Pages 104: The Retina as a Proxy for Brain Neurodegeneration: A Narrative Review on OCT-Based Retinal Imaging in the Early Detection of Alzheimer&amp;rsquo;s and Parkinson&amp;rsquo;s Disease</title>
	<link>https://www.mdpi.com/2313-433X/12/3/104</link>
	<description>Neurodegenerative diseases, including Alzheimer&amp;amp;rsquo;s disease (AD) and Parkinson&amp;amp;rsquo;s disease (PD), are major causes of cognitive and motor decline, yet early diagnosis remains challenging due to asymptomatic phases and limited non-invasive biomarkers. This narrative review systematically synthesized studies on retinal imaging in AD and PD. Published studies were identified through searches of PubMed, MEDLINE, Google Scholar, and reference lists, focusing on Optical Coherence Tomography (OCT), OCT Angiography (OCTA), and Spectral-Domain OCT (SD-OCT) assessing retinal structural and vascular changes. Data were extracted on retinal layer thickness, vascular parameters, and diagnostic metrics. Findings indicate that both diseases consistently exhibit thinning of inner retinal layers, particularly the retinal nerve fiber layer (RNFL) and ganglion cell&amp;amp;ndash;inner plexiform layer (GCIPL). In AD, studies reported progressive inner retinal thinning across disease stages, sometimes accompanied by outer retinal and retinal pigment epithelium changes. In PD, thinning was observed predominantly in RNFL and GCIPL, correlating with disease duration and motor severity. Microvascular alterations were described in both disorders, with disease-specific spatial patterns reported across studies. Overall, retinal imaging emerges as a non-invasive, high-resolution, and cost-effective tool for early detection, differential assessment, and longitudinal monitoring of neurodegenerative diseases. These findings support the translation of retinal biomarkers into clinical practice for improved disease management.</description>
	<pubDate>2026-02-27</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 104: The Retina as a Proxy for Brain Neurodegeneration: A Narrative Review on OCT-Based Retinal Imaging in the Early Detection of Alzheimer&amp;rsquo;s and Parkinson&amp;rsquo;s Disease</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/104">doi: 10.3390/jimaging12030104</a></p>
	<p>Authors:
		Ouafa Sijilmassi
		</p>
	<p>Neurodegenerative diseases, including Alzheimer&amp;amp;rsquo;s disease (AD) and Parkinson&amp;amp;rsquo;s disease (PD), are major causes of cognitive and motor decline, yet early diagnosis remains challenging due to asymptomatic phases and limited non-invasive biomarkers. This narrative review systematically synthesized studies on retinal imaging in AD and PD. Published studies were identified through searches of PubMed, MEDLINE, Google Scholar, and reference lists, focusing on Optical Coherence Tomography (OCT), OCT Angiography (OCTA), and Spectral-Domain OCT (SD-OCT) assessing retinal structural and vascular changes. Data were extracted on retinal layer thickness, vascular parameters, and diagnostic metrics. Findings indicate that both diseases consistently exhibit thinning of inner retinal layers, particularly the retinal nerve fiber layer (RNFL) and ganglion cell&amp;amp;ndash;inner plexiform layer (GCIPL). In AD, studies reported progressive inner retinal thinning across disease stages, sometimes accompanied by outer retinal and retinal pigment epithelium changes. In PD, thinning was observed predominantly in RNFL and GCIPL, correlating with disease duration and motor severity. Microvascular alterations were described in both disorders, with disease-specific spatial patterns reported across studies. Overall, retinal imaging emerges as a non-invasive, high-resolution, and cost-effective tool for early detection, differential assessment, and longitudinal monitoring of neurodegenerative diseases. These findings support the translation of retinal biomarkers into clinical practice for improved disease management.</p>
	]]></content:encoded>

	<dc:title>The Retina as a Proxy for Brain Neurodegeneration: A Narrative Review on OCT-Based Retinal Imaging in the Early Detection of Alzheimer&amp;amp;rsquo;s and Parkinson&amp;amp;rsquo;s Disease</dc:title>
			<dc:creator>Ouafa Sijilmassi</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030104</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-27</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-27</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Review</prism:section>
	<prism:startingPage>104</prism:startingPage>
		<prism:doi>10.3390/jimaging12030104</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/104</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/103">

	<title>J. Imaging, Vol. 12, Pages 103: Restoration of Non-Uniform Motion-Blurred Star Images Based on Dynamic Strip Attention</title>
	<link>https://www.mdpi.com/2313-433X/12/3/103</link>
	<description>When capturing star images in long-exposure mode, due to the relative motion between stars and space objects and the observation camera, strip tailings with different directions and lengths will be formed, resulting in a serious decline in image quality and inaccurate centroid positioning. Traditional methods for restoring star images are prone to ringing effects and cannot restore the non-uniformly blurred star images. Aiming at this problem, this paper proposes a star image restoration network based on a dynamic strip attention mechanism. Firstly, a Multi-scale Dynamic Strip Pooling Module is designed to adaptively extract blurred features of different lengths and directions by dynamically adjusting the strip convolution. After that, a Multi-scale Feature Fusion Module is designed to fuse multi-level features to reduce the loss of image details of stars and space objects in the image. Experimental results demonstrate that the proposed method achieves a PSNR of 84.08 and an SSIM of 0.9928 on the 16-bit simulated dataset, outperforming both traditional methods and other deep learning-based approaches. Specifically, the recognition accuracy of star points is increased by 174% in comparison with unprocessed images. Furthermore, this paper validates the network using the real-world dataset spotGEO, and the results indicate that the average number of successfully recognized star points is increased by 57% compared to direct processing of the original images.</description>
	<pubDate>2026-02-27</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 103: Restoration of Non-Uniform Motion-Blurred Star Images Based on Dynamic Strip Attention</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/103">doi: 10.3390/jimaging12030103</a></p>
	<p>Authors:
		Jixin Han
		Zhaodong Niu
		Jun He
		</p>
	<p>When capturing star images in long-exposure mode, due to the relative motion between stars and space objects and the observation camera, strip tailings with different directions and lengths will be formed, resulting in a serious decline in image quality and inaccurate centroid positioning. Traditional methods for restoring star images are prone to ringing effects and cannot restore the non-uniformly blurred star images. Aiming at this problem, this paper proposes a star image restoration network based on a dynamic strip attention mechanism. Firstly, a Multi-scale Dynamic Strip Pooling Module is designed to adaptively extract blurred features of different lengths and directions by dynamically adjusting the strip convolution. After that, a Multi-scale Feature Fusion Module is designed to fuse multi-level features to reduce the loss of image details of stars and space objects in the image. Experimental results demonstrate that the proposed method achieves a PSNR of 84.08 and an SSIM of 0.9928 on the 16-bit simulated dataset, outperforming both traditional methods and other deep learning-based approaches. Specifically, the recognition accuracy of star points is increased by 174% in comparison with unprocessed images. Furthermore, this paper validates the network using the real-world dataset spotGEO, and the results indicate that the average number of successfully recognized star points is increased by 57% compared to direct processing of the original images.</p>
	]]></content:encoded>

	<dc:title>Restoration of Non-Uniform Motion-Blurred Star Images Based on Dynamic Strip Attention</dc:title>
			<dc:creator>Jixin Han</dc:creator>
			<dc:creator>Zhaodong Niu</dc:creator>
			<dc:creator>Jun He</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030103</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-27</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-27</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>103</prism:startingPage>
		<prism:doi>10.3390/jimaging12030103</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/103</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/102">

	<title>J. Imaging, Vol. 12, Pages 102: Fine-Grained Age-Class Identification of Moso Bamboo Using an Improved Lightweight YOLO11 Model</title>
	<link>https://www.mdpi.com/2313-433X/12/3/102</link>
	<description>Accurate identification of moso bamboo (Phyllostachys edulis) age classes is essential for effective forestry resource management, yet existing methods often struggle to achieve a satisfactory balance between accuracy and computational efficiency under complex field conditions. To address this challenge, this study proposes a lightweight object detection model, termed YOLO11-GCR, for fine-grained moso bamboo age-class classification based on close-range imagery. The proposed approach builds upon the YOLO11 framework and incorporates Ghost convolution, the Convolutional Block Attention Module (CBAM), and a Receptive Field Block (RFB) to reduce model complexity, enhance discriminative feature representation, and improve sensitivity to subtle texture variations among age classes. A dataset consisting of 9538 annotated bamboo culm images covering four age classes (I-du to IV-du) was constructed and divided into training, validation, and independent test sets with strict spatiotemporal separation. Experimental results indicate that YOLO11-GCR achieves robust detection performance with a lightweight architecture of 2.62 &amp;amp;times; 106 parameters and 6.2 GFLOPs, yielding an mAP@0.5 of 0.913 and an mAP@0.5&amp;amp;ndash;0.95 of 0.895 on the independent test set. Notably, the model demonstrates improved classification stability for visually similar age classes, such as II-du and III-du. Overall, this study presents an efficient and practical imaging-based solution for automated moso bamboo age-class recognition in complex natural environments.</description>
	<pubDate>2026-02-27</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 102: Fine-Grained Age-Class Identification of Moso Bamboo Using an Improved Lightweight YOLO11 Model</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/102">doi: 10.3390/jimaging12030102</a></p>
	<p>Authors:
		Yingbin Zhang
		Xinhuang Zhang
		Zhichao Cai
		Xi He
		Shuwei Chen
		Zhengxuan Lai
		Kunyong Yu
		Riwen Lai
		</p>
	<p>Accurate identification of moso bamboo (Phyllostachys edulis) age classes is essential for effective forestry resource management, yet existing methods often struggle to achieve a satisfactory balance between accuracy and computational efficiency under complex field conditions. To address this challenge, this study proposes a lightweight object detection model, termed YOLO11-GCR, for fine-grained moso bamboo age-class classification based on close-range imagery. The proposed approach builds upon the YOLO11 framework and incorporates Ghost convolution, the Convolutional Block Attention Module (CBAM), and a Receptive Field Block (RFB) to reduce model complexity, enhance discriminative feature representation, and improve sensitivity to subtle texture variations among age classes. A dataset consisting of 9538 annotated bamboo culm images covering four age classes (I-du to IV-du) was constructed and divided into training, validation, and independent test sets with strict spatiotemporal separation. Experimental results indicate that YOLO11-GCR achieves robust detection performance with a lightweight architecture of 2.62 &amp;amp;times; 106 parameters and 6.2 GFLOPs, yielding an mAP@0.5 of 0.913 and an mAP@0.5&amp;amp;ndash;0.95 of 0.895 on the independent test set. Notably, the model demonstrates improved classification stability for visually similar age classes, such as II-du and III-du. Overall, this study presents an efficient and practical imaging-based solution for automated moso bamboo age-class recognition in complex natural environments.</p>
	]]></content:encoded>

	<dc:title>Fine-Grained Age-Class Identification of Moso Bamboo Using an Improved Lightweight YOLO11 Model</dc:title>
			<dc:creator>Yingbin Zhang</dc:creator>
			<dc:creator>Xinhuang Zhang</dc:creator>
			<dc:creator>Zhichao Cai</dc:creator>
			<dc:creator>Xi He</dc:creator>
			<dc:creator>Shuwei Chen</dc:creator>
			<dc:creator>Zhengxuan Lai</dc:creator>
			<dc:creator>Kunyong Yu</dc:creator>
			<dc:creator>Riwen Lai</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030102</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-27</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-27</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>102</prism:startingPage>
		<prism:doi>10.3390/jimaging12030102</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/102</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/101">

	<title>J. Imaging, Vol. 12, Pages 101: Lensless Quantitative Phase Imaging with Bayer-Filtered Color Sensors Under Sequential RGB-LED Illumination</title>
	<link>https://www.mdpi.com/2313-433X/12/3/101</link>
	<description>Lensless on-chip microscopy enables high-throughput, wide-FOV imaging; however, the Bayer color filter array (CFA) in standard color sensors spatially multiplexes spectral channels, introducing sub-sampling and spectral crosstalk that degrade phase retrieval. We propose a Wirtinger Poly-Gradient Solver (WPGS) for quantitative phase reconstruction with Bayer-filtered color sensors under sequential Red&amp;amp;ndash;Green&amp;amp;ndash;Blue Light-Emitting Diode (RGB-LED) illumination. The method combines Transport of Intensity Equation (TIE)-based initialization with polychromatic Wirtinger optimization to suppress CFA-induced artifacts and enable pixel super-resolution (PSR). Experiments resolve a 2.76&amp;amp;nbsp;&amp;amp;mu;m linewidth using a 1.85&amp;amp;nbsp;&amp;amp;mu;m pixel-pitch sensor, exceeding the nominal Nyquist limit imposed by pixel sampling. We further demonstrate label-free imaging of HeLa cells and unstained tissue sections, supporting high-throughput digital pathology and offering potential for longitudinal biological observation.</description>
	<pubDate>2026-02-26</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 101: Lensless Quantitative Phase Imaging with Bayer-Filtered Color Sensors Under Sequential RGB-LED Illumination</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/101">doi: 10.3390/jimaging12030101</a></p>
	<p>Authors:
		Jiajia Wu
		Yining Li
		Yuheng Luo
		Leiting Pan
		Pengming Song
		Qiang Xu
		</p>
	<p>Lensless on-chip microscopy enables high-throughput, wide-FOV imaging; however, the Bayer color filter array (CFA) in standard color sensors spatially multiplexes spectral channels, introducing sub-sampling and spectral crosstalk that degrade phase retrieval. We propose a Wirtinger Poly-Gradient Solver (WPGS) for quantitative phase reconstruction with Bayer-filtered color sensors under sequential Red&amp;amp;ndash;Green&amp;amp;ndash;Blue Light-Emitting Diode (RGB-LED) illumination. The method combines Transport of Intensity Equation (TIE)-based initialization with polychromatic Wirtinger optimization to suppress CFA-induced artifacts and enable pixel super-resolution (PSR). Experiments resolve a 2.76&amp;amp;nbsp;&amp;amp;mu;m linewidth using a 1.85&amp;amp;nbsp;&amp;amp;mu;m pixel-pitch sensor, exceeding the nominal Nyquist limit imposed by pixel sampling. We further demonstrate label-free imaging of HeLa cells and unstained tissue sections, supporting high-throughput digital pathology and offering potential for longitudinal biological observation.</p>
	]]></content:encoded>

	<dc:title>Lensless Quantitative Phase Imaging with Bayer-Filtered Color Sensors Under Sequential RGB-LED Illumination</dc:title>
			<dc:creator>Jiajia Wu</dc:creator>
			<dc:creator>Yining Li</dc:creator>
			<dc:creator>Yuheng Luo</dc:creator>
			<dc:creator>Leiting Pan</dc:creator>
			<dc:creator>Pengming Song</dc:creator>
			<dc:creator>Qiang Xu</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030101</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-26</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-26</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Communication</prism:section>
	<prism:startingPage>101</prism:startingPage>
		<prism:doi>10.3390/jimaging12030101</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/101</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/100">

	<title>J. Imaging, Vol. 12, Pages 100: The Augmented Cytopathologist: A Conceptual Exploratory Narrative Review on Immersive and Vision&amp;ndash;Language Models Tools in Digital Pathology</title>
	<link>https://www.mdpi.com/2313-433X/12/3/100</link>
	<description>Emerging digital technologies, including immersive environments (VR/AR/XR) and Vision&amp;amp;ndash;Language Models (VLMs), have the potential to reshape digital pathology and medical imaging. While immersive tools can enhance spatial visualization and procedural training, VLM-based copilots offer cognitive and workflow support. Their combined impact on cytopathology remains largely conceptual and preclinical. This Conceptual Exploratory Narrative Review (CENR) examines how immersive technologies and VLM-based copilots may jointly influence cytopathologists&amp;amp;rsquo; professional workflow, training, and diagnostic processes, introducing the notion of the &amp;amp;ldquo;augmented cytopathologist.&amp;amp;rdquo; A structured exploratory approach integrated peer-reviewed literature, position papers, preprints, gray literature (technical reports, white papers, conference abstracts, blogs), and cross-disciplinary perspectives. Database searches (PubMed, Web of Science, Scopus) confirmed a limited number of studies addressing immersive or AI-assisted cytopathology imaging. Thematic analysis focused on four conceptual dimensions: (1) technological capabilities and maturity; (2) workflow and educational applications; (3) professional implications and cytopathologist role; and (4) responsible use of LLMs and VLMs as supportive tools. This approach emphasizes interpretation of emerging trends over aggregation of empirical data, enabling conceptual synthesis of early-stage implementations and perspectives in the field. Immersive technologies facilitate three-dimensional visualization, procedural skill development, and collaborative engagement, whereas VLMs support report generation, literature retrieval, and decision guidance. Together, they offer a synergistic model for perceptual and cognitive augmentation. Key challenges include technical maturity, interoperability, workflow integration, regulatory compliance, and ethical oversight. Figures illustrate representative examples of (1) remote collaborative immersive evaluation and (2) integration of immersive visualization with VLM-based copilots, highlighting potential applications in training and workflow support. The CENR underscores the potential of combining immersive tools and AI copilots to support cytopathology, particularly for education, workflow efficiency, and cognitive augmentation. Adoption should be incremental and carefully governed, emphasizing augmentative rather than transformative use. Future research should focus on clinical validation, scalable integration, and regulatory and ethical frameworks to realize the concept of the augmented cytopathologist in practice.</description>
	<pubDate>2026-02-26</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 100: The Augmented Cytopathologist: A Conceptual Exploratory Narrative Review on Immersive and Vision&amp;ndash;Language Models Tools in Digital Pathology</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/100">doi: 10.3390/jimaging12030100</a></p>
	<p>Authors:
		Enrico Giarnieri
		Andrea Lastrucci
		Alberto Ricci
		Pierdonato Bruno
		Daniele Giansanti
		</p>
	<p>Emerging digital technologies, including immersive environments (VR/AR/XR) and Vision&amp;amp;ndash;Language Models (VLMs), have the potential to reshape digital pathology and medical imaging. While immersive tools can enhance spatial visualization and procedural training, VLM-based copilots offer cognitive and workflow support. Their combined impact on cytopathology remains largely conceptual and preclinical. This Conceptual Exploratory Narrative Review (CENR) examines how immersive technologies and VLM-based copilots may jointly influence cytopathologists&amp;amp;rsquo; professional workflow, training, and diagnostic processes, introducing the notion of the &amp;amp;ldquo;augmented cytopathologist.&amp;amp;rdquo; A structured exploratory approach integrated peer-reviewed literature, position papers, preprints, gray literature (technical reports, white papers, conference abstracts, blogs), and cross-disciplinary perspectives. Database searches (PubMed, Web of Science, Scopus) confirmed a limited number of studies addressing immersive or AI-assisted cytopathology imaging. Thematic analysis focused on four conceptual dimensions: (1) technological capabilities and maturity; (2) workflow and educational applications; (3) professional implications and cytopathologist role; and (4) responsible use of LLMs and VLMs as supportive tools. This approach emphasizes interpretation of emerging trends over aggregation of empirical data, enabling conceptual synthesis of early-stage implementations and perspectives in the field. Immersive technologies facilitate three-dimensional visualization, procedural skill development, and collaborative engagement, whereas VLMs support report generation, literature retrieval, and decision guidance. Together, they offer a synergistic model for perceptual and cognitive augmentation. Key challenges include technical maturity, interoperability, workflow integration, regulatory compliance, and ethical oversight. Figures illustrate representative examples of (1) remote collaborative immersive evaluation and (2) integration of immersive visualization with VLM-based copilots, highlighting potential applications in training and workflow support. The CENR underscores the potential of combining immersive tools and AI copilots to support cytopathology, particularly for education, workflow efficiency, and cognitive augmentation. Adoption should be incremental and carefully governed, emphasizing augmentative rather than transformative use. Future research should focus on clinical validation, scalable integration, and regulatory and ethical frameworks to realize the concept of the augmented cytopathologist in practice.</p>
	]]></content:encoded>

	<dc:title>The Augmented Cytopathologist: A Conceptual Exploratory Narrative Review on Immersive and Vision&amp;amp;ndash;Language Models Tools in Digital Pathology</dc:title>
			<dc:creator>Enrico Giarnieri</dc:creator>
			<dc:creator>Andrea Lastrucci</dc:creator>
			<dc:creator>Alberto Ricci</dc:creator>
			<dc:creator>Pierdonato Bruno</dc:creator>
			<dc:creator>Daniele Giansanti</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030100</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-26</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-26</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Review</prism:section>
	<prism:startingPage>100</prism:startingPage>
		<prism:doi>10.3390/jimaging12030100</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/100</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/99">

	<title>J. Imaging, Vol. 12, Pages 99: Design and Development of an Automated Pipeline for Medical Hyperspectral Image Acquisition, Processing, and Fusion</title>
	<link>https://www.mdpi.com/2313-433X/12/3/99</link>
	<description>Automated and comprehensive processing of hyperspectral image data is increasingly important in academic research and medical technology. This study presents an automated processing pipeline that integrates hyperspectral image acquisition, analysis, multimodal fusion, and centralized data management to improve the interpretability of spectral information for biological tissue analysis. The pipeline supports modular hyperspectral data processing, fusion of complementary wavelength ranges, and scalable data storage, and was implemented in Python 3.13.3. The pipeline was evaluated using hyperspectral imaging data acquired from a coronal mouse brain section. Clustering-based analysis and spectral correlation metrics were applied to assess the impact of multimodal data fusion on spectral representation. Clustering of individual modalities yielded silhouette coefficients of 0.5879 for near-infrared data, 0.6020 for mid-infrared data, and 0.6715 for RGB data. Multimodal fusion reduced the silhouette coefficient to 0.5420 and enabled the identification of anatomical structures that were not distinguishable in any single modality. High spectral correlation coefficients exceeding 0.98 confirmed that spectral fidelity was preserved during fusion. These results demonstrate that automated multimodal hyperspectral data fusion can enhance the interpretability of biological tissue despite reduced clustering compactness. The proposed pipeline provides a structured framework for preclinical hyperspectral imaging workflows and supports exploratory biological analysis in medical imaging contexts.</description>
	<pubDate>2026-02-25</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 99: Design and Development of an Automated Pipeline for Medical Hyperspectral Image Acquisition, Processing, and Fusion</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/99">doi: 10.3390/jimaging12030099</a></p>
	<p>Authors:
		Felix Wühler
		Tim Markus Häußermann
		Alessa Rache
		Björn van Marwick
		Carmen Wängler
		Julian Reichwald
		Matthias Rädle
		</p>
	<p>Automated and comprehensive processing of hyperspectral image data is increasingly important in academic research and medical technology. This study presents an automated processing pipeline that integrates hyperspectral image acquisition, analysis, multimodal fusion, and centralized data management to improve the interpretability of spectral information for biological tissue analysis. The pipeline supports modular hyperspectral data processing, fusion of complementary wavelength ranges, and scalable data storage, and was implemented in Python 3.13.3. The pipeline was evaluated using hyperspectral imaging data acquired from a coronal mouse brain section. Clustering-based analysis and spectral correlation metrics were applied to assess the impact of multimodal data fusion on spectral representation. Clustering of individual modalities yielded silhouette coefficients of 0.5879 for near-infrared data, 0.6020 for mid-infrared data, and 0.6715 for RGB data. Multimodal fusion reduced the silhouette coefficient to 0.5420 and enabled the identification of anatomical structures that were not distinguishable in any single modality. High spectral correlation coefficients exceeding 0.98 confirmed that spectral fidelity was preserved during fusion. These results demonstrate that automated multimodal hyperspectral data fusion can enhance the interpretability of biological tissue despite reduced clustering compactness. The proposed pipeline provides a structured framework for preclinical hyperspectral imaging workflows and supports exploratory biological analysis in medical imaging contexts.</p>
	]]></content:encoded>

	<dc:title>Design and Development of an Automated Pipeline for Medical Hyperspectral Image Acquisition, Processing, and Fusion</dc:title>
			<dc:creator>Felix Wühler</dc:creator>
			<dc:creator>Tim Markus Häußermann</dc:creator>
			<dc:creator>Alessa Rache</dc:creator>
			<dc:creator>Björn van Marwick</dc:creator>
			<dc:creator>Carmen Wängler</dc:creator>
			<dc:creator>Julian Reichwald</dc:creator>
			<dc:creator>Matthias Rädle</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030099</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-25</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-25</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>99</prism:startingPage>
		<prism:doi>10.3390/jimaging12030099</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/99</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/98">

	<title>J. Imaging, Vol. 12, Pages 98: Hybrid Vision Transformer&amp;ndash;CNN Framework for Alzheimer&amp;rsquo;s Disease Cell Type Classification: A Comparative Study with Vision&amp;ndash;Language Models</title>
	<link>https://www.mdpi.com/2313-433X/12/3/98</link>
	<description>Accurate identification of Alzheimer&amp;amp;rsquo;s disease (AD)-related cellular characteristics from microscopy images is essential for understanding neurodegenerative mechanisms at the cellular level. While most computational approaches focus on macroscopic neuroimaging modalities, cell type classification from microscopy remains relatively underexplored. In this study, we propose a hybrid vision transformer&amp;amp;ndash;convolutional neural network (ViT&amp;amp;ndash;CNN) framework that integrates DeiT-Small and EfficientNet-B7 to classify three AD-related cell types&amp;amp;mdash;astrocytes, cortical neurons, and SH-SY5Y neuroblastoma cells&amp;amp;mdash;from phase-contrast microscopy images. We perform a comparative evaluation against conventional CNN architectures (DenseNet, ResNet, InceptionNet, and MobileNet) and prompt-based multimodal vision&amp;amp;ndash;language models (GPT-5, GPT-4o, and Gemini 2.5-Flash) using zero-shot, few-shot, and chain-of-thought prompting. Experiments conducted with stratified fivefold cross-validation show that the proposed hybrid model achieves a test accuracy of 61.03% and a macro F1 score of 61.85, outperforming standalone CNN baselines and prompt-only LLM approaches under data-limited conditions. These results suggest that combining convolutional inductive biases with transformer-based global context modeling can improve generalization for cellular microscopy classification. While constrained by dataset size and scope, this work serves as a proof of concept and highlights promising directions for future research in domain-specific pretraining, multimodal data integration, and explainable AI for AD-related cellular analysis.</description>
	<pubDate>2026-02-25</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 98: Hybrid Vision Transformer&amp;ndash;CNN Framework for Alzheimer&amp;rsquo;s Disease Cell Type Classification: A Comparative Study with Vision&amp;ndash;Language Models</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/98">doi: 10.3390/jimaging12030098</a></p>
	<p>Authors:
		Md Easin Hasan
		Md Tahmid Hasan Fuad
		Omar Sharif
		Amy Wagler
		</p>
	<p>Accurate identification of Alzheimer&amp;amp;rsquo;s disease (AD)-related cellular characteristics from microscopy images is essential for understanding neurodegenerative mechanisms at the cellular level. While most computational approaches focus on macroscopic neuroimaging modalities, cell type classification from microscopy remains relatively underexplored. In this study, we propose a hybrid vision transformer&amp;amp;ndash;convolutional neural network (ViT&amp;amp;ndash;CNN) framework that integrates DeiT-Small and EfficientNet-B7 to classify three AD-related cell types&amp;amp;mdash;astrocytes, cortical neurons, and SH-SY5Y neuroblastoma cells&amp;amp;mdash;from phase-contrast microscopy images. We perform a comparative evaluation against conventional CNN architectures (DenseNet, ResNet, InceptionNet, and MobileNet) and prompt-based multimodal vision&amp;amp;ndash;language models (GPT-5, GPT-4o, and Gemini 2.5-Flash) using zero-shot, few-shot, and chain-of-thought prompting. Experiments conducted with stratified fivefold cross-validation show that the proposed hybrid model achieves a test accuracy of 61.03% and a macro F1 score of 61.85, outperforming standalone CNN baselines and prompt-only LLM approaches under data-limited conditions. These results suggest that combining convolutional inductive biases with transformer-based global context modeling can improve generalization for cellular microscopy classification. While constrained by dataset size and scope, this work serves as a proof of concept and highlights promising directions for future research in domain-specific pretraining, multimodal data integration, and explainable AI for AD-related cellular analysis.</p>
	]]></content:encoded>

	<dc:title>Hybrid Vision Transformer&amp;amp;ndash;CNN Framework for Alzheimer&amp;amp;rsquo;s Disease Cell Type Classification: A Comparative Study with Vision&amp;amp;ndash;Language Models</dc:title>
			<dc:creator>Md Easin Hasan</dc:creator>
			<dc:creator>Md Tahmid Hasan Fuad</dc:creator>
			<dc:creator>Omar Sharif</dc:creator>
			<dc:creator>Amy Wagler</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030098</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-25</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-25</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>98</prism:startingPage>
		<prism:doi>10.3390/jimaging12030098</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/98</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/97">

	<title>J. Imaging, Vol. 12, Pages 97: A Deep Learning-Based Correction for Scanning Radius Errors in Circular-Scan Photoacoustic Tomography</title>
	<link>https://www.mdpi.com/2313-433X/12/3/97</link>
	<description>Circular-Scan photoacoustic tomography (PAT) can provide high-resolution images of optical absorption, but its analytical reconstructions, such as delay-and-sum (DAS), are highly sensitive to scanning radius (SR) inaccuracies, which cause severe geometric distortions and artifacts. In this work, we propose a deep learning framework, termed smooth deconvolution ResNet (SD-ResNet), to correct DAS reconstruction degradation induced by SR errors. SD-ResNet uses an ImageNet-pretrained ResNet-50 encoder and a lightweight deconvolutional decoder with additional smoothing convolutions to suppress checkerboard artifacts and restore fine structural details. A paired training dataset is generated using k-Wave simulations driven by human thoracic computed tomography (CT) slices: for each phantom, radiofrequency data are simulated once, and DAS images reconstructed with the true SR serve as ground truth, whereas images reconstructed with biased SR values serve as inputs. This design provides structurally diverse training samples and enhances generalization. In silico experiments show that SD-ResNet effectively recovers image quality across a range of SR deviations. Phantom experiments with polyethylene microspheres further confirm that the proposed method can substantially reduce artifacts and recover correct source shapes under practical SR mismatches, offering a robust tool for SR-error-resilient PAT imaging.</description>
	<pubDate>2026-02-25</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 97: A Deep Learning-Based Correction for Scanning Radius Errors in Circular-Scan Photoacoustic Tomography</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/97">doi: 10.3390/jimaging12030097</a></p>
	<p>Authors:
		Jie Yin
		Yingjie Feng
		Junjun He
		Min Xie
		Chao Tao
		</p>
	<p>Circular-Scan photoacoustic tomography (PAT) can provide high-resolution images of optical absorption, but its analytical reconstructions, such as delay-and-sum (DAS), are highly sensitive to scanning radius (SR) inaccuracies, which cause severe geometric distortions and artifacts. In this work, we propose a deep learning framework, termed smooth deconvolution ResNet (SD-ResNet), to correct DAS reconstruction degradation induced by SR errors. SD-ResNet uses an ImageNet-pretrained ResNet-50 encoder and a lightweight deconvolutional decoder with additional smoothing convolutions to suppress checkerboard artifacts and restore fine structural details. A paired training dataset is generated using k-Wave simulations driven by human thoracic computed tomography (CT) slices: for each phantom, radiofrequency data are simulated once, and DAS images reconstructed with the true SR serve as ground truth, whereas images reconstructed with biased SR values serve as inputs. This design provides structurally diverse training samples and enhances generalization. In silico experiments show that SD-ResNet effectively recovers image quality across a range of SR deviations. Phantom experiments with polyethylene microspheres further confirm that the proposed method can substantially reduce artifacts and recover correct source shapes under practical SR mismatches, offering a robust tool for SR-error-resilient PAT imaging.</p>
	]]></content:encoded>

	<dc:title>A Deep Learning-Based Correction for Scanning Radius Errors in Circular-Scan Photoacoustic Tomography</dc:title>
			<dc:creator>Jie Yin</dc:creator>
			<dc:creator>Yingjie Feng</dc:creator>
			<dc:creator>Junjun He</dc:creator>
			<dc:creator>Min Xie</dc:creator>
			<dc:creator>Chao Tao</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030097</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-25</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-25</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>97</prism:startingPage>
		<prism:doi>10.3390/jimaging12030097</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/97</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/96">

	<title>J. Imaging, Vol. 12, Pages 96: Recognition, Localization and 3D Geometric Morphology Calculation of Microblind Holes in Complex Backgrounds Based on the Improved YOLOv11 Network and AVC Algorithm</title>
	<link>https://www.mdpi.com/2313-433X/12/3/96</link>
	<description>Microblind hole processing quality inspection, especially accurately identifying microblind hole contour features and precisely detecting 3D and morphological parameters, has always been challenging, especially for accurately identifying those of different sizes, depths, and contour features simultaneously. This poses a great challenge for identifying and localizing microblind hole contours based on machine vision and accurately calculating three-dimensional parameters. This study takes cigarette microblind holes (diameter of 0.1&amp;amp;ndash;0.2 mm, depth of approximately 35 &amp;amp;micro;m) as the research object. It focuses on solving two major challenges: recognizing and localizing microblind hole contours in complex texture backgrounds and accurately calculating their 3D geometric morphology. An improved YOLOv11s model is proposed for microblind hole image multiobject detection with complex texture backgrounds to extract their features completely. An Area&amp;amp;ndash;Volume Computation (AVC) algorithm, which utilizes discrete integral estimation and curve-fitting principles, is also proposed for computing their surface area and volume. The experimental results show that the precision, recall, mAP@0.5, mAP@0.5:0.95, and prediction time of the improved YOLOv11 network are 0.915, 0.948, 0.925, 0.615, and 1.27 ms, respectively. The relative errors (REs) of the surface area and volume calculation of the microblind holes are 5.236% and 3.964%, respectively. The proposed method achieves microblind hole recognition, localization and 3D morphology calculation accuracy, meeting cigarette on-site inspection criteria. Additionally, a reference for detecting other similar objects in complex texture backgrounds and accurately calculating 3D tasks is provided.</description>
	<pubDate>2026-02-24</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 96: Recognition, Localization and 3D Geometric Morphology Calculation of Microblind Holes in Complex Backgrounds Based on the Improved YOLOv11 Network and AVC Algorithm</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/96">doi: 10.3390/jimaging12030096</a></p>
	<p>Authors:
		Chengfen Zhang
		Dong Xia
		Ruizhao Chen
		Qunfeng Niu
		Tao Wang
		Li Wang
		</p>
	<p>Microblind hole processing quality inspection, especially accurately identifying microblind hole contour features and precisely detecting 3D and morphological parameters, has always been challenging, especially for accurately identifying those of different sizes, depths, and contour features simultaneously. This poses a great challenge for identifying and localizing microblind hole contours based on machine vision and accurately calculating three-dimensional parameters. This study takes cigarette microblind holes (diameter of 0.1&amp;amp;ndash;0.2 mm, depth of approximately 35 &amp;amp;micro;m) as the research object. It focuses on solving two major challenges: recognizing and localizing microblind hole contours in complex texture backgrounds and accurately calculating their 3D geometric morphology. An improved YOLOv11s model is proposed for microblind hole image multiobject detection with complex texture backgrounds to extract their features completely. An Area&amp;amp;ndash;Volume Computation (AVC) algorithm, which utilizes discrete integral estimation and curve-fitting principles, is also proposed for computing their surface area and volume. The experimental results show that the precision, recall, mAP@0.5, mAP@0.5:0.95, and prediction time of the improved YOLOv11 network are 0.915, 0.948, 0.925, 0.615, and 1.27 ms, respectively. The relative errors (REs) of the surface area and volume calculation of the microblind holes are 5.236% and 3.964%, respectively. The proposed method achieves microblind hole recognition, localization and 3D morphology calculation accuracy, meeting cigarette on-site inspection criteria. Additionally, a reference for detecting other similar objects in complex texture backgrounds and accurately calculating 3D tasks is provided.</p>
	]]></content:encoded>

	<dc:title>Recognition, Localization and 3D Geometric Morphology Calculation of Microblind Holes in Complex Backgrounds Based on the Improved YOLOv11 Network and AVC Algorithm</dc:title>
			<dc:creator>Chengfen Zhang</dc:creator>
			<dc:creator>Dong Xia</dc:creator>
			<dc:creator>Ruizhao Chen</dc:creator>
			<dc:creator>Qunfeng Niu</dc:creator>
			<dc:creator>Tao Wang</dc:creator>
			<dc:creator>Li Wang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030096</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-24</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-24</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>96</prism:startingPage>
		<prism:doi>10.3390/jimaging12030096</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/96</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/95">

	<title>J. Imaging, Vol. 12, Pages 95: Hybrid MICO-LAC Segmentation with Panoptic Tumor Instance Analysis for Dense Breast Mammograms</title>
	<link>https://www.mdpi.com/2313-433X/12/3/95</link>
	<description>This study proposes a clinically driven hybrid segmentation framework for dense breast tissue analysis in mammographic images, addressing persistent challenges associated with intensity inhomogeneity, low-contrast, and complex tumor morphology. The framework integrates Multiplicative Intrinsic Component Optimization (MICO_2D) for bias field correction, followed by a distance-regularized multiphase Vese&amp;amp;ndash;Chan level-set model for coarse global tumor segmentation. To achieve precise boundary delineation, a localized refinement stage is employed using Localized Active Contours (LAC) with Local Image Fitting (LIF) energy, supported by Gaussian regularization to ensure smooth and coherent boundaries in regions with ambiguous tissue transitions. Building upon the refined semantic tumor mask, the framework further incorporates a panoptic-style tumor instance segmentation stage, enabling the decomposition of connected tumor regions into distinct anatomical instances, which were evaluated on both MIAS and INBreast mammography datasets to demonstrate generalizability. This extension facilitates detailed structural analysis of tumor multiplicity and spatial organization, enhancing interpretability beyond conventional pixel wise segmentation. Experiments conducted on Cranio-Caudal (CC) and Medio-Lateral Oblique (MLO) mammographic views demonstrate competitive performance relative to baseline U-Net and advanced deep learning fusion architectures, including multi-scale and multi-view networks, while offering improved interpretability and robustness. Quantitative evaluation using overlap-related metrics shows strong spatial agreement between predicted and reference segmentations, with per-image Dice Similarity Coefficient (DSC) and Intersection over Union (IoU) distributions reported to ensure reproducibility. Descriptive per-image analysis, supported by bootstrap-based confidence intervals and paired comparisons, indicates consistent performance improvements across images. Robustness analysis under realistic perturbations, including noise, contrast degradation, blur, and rotation, demonstrates stable performance across varying imaging conditions. Furthermore, feature space visualizations using t-SNE and UMAP reveal clear separability between cancerous and non-cancerous tissue regions, highlighting the discriminative capability of the proposed framework. Overall, the results demonstrate the effectiveness, robustness, and clinical motivation of this hybrid panoptic framework for comprehensive dense breast tumor analysis in mammography, while emphasizing reproducibility and conservative statistical assessment.</description>
	<pubDate>2026-02-24</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 95: Hybrid MICO-LAC Segmentation with Panoptic Tumor Instance Analysis for Dense Breast Mammograms</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/95">doi: 10.3390/jimaging12030095</a></p>
	<p>Authors:
		Razia Jamil
		Min Dong
		Orken Mamyrbayev
		Ainur Akhmediyarova
		</p>
	<p>This study proposes a clinically driven hybrid segmentation framework for dense breast tissue analysis in mammographic images, addressing persistent challenges associated with intensity inhomogeneity, low-contrast, and complex tumor morphology. The framework integrates Multiplicative Intrinsic Component Optimization (MICO_2D) for bias field correction, followed by a distance-regularized multiphase Vese&amp;amp;ndash;Chan level-set model for coarse global tumor segmentation. To achieve precise boundary delineation, a localized refinement stage is employed using Localized Active Contours (LAC) with Local Image Fitting (LIF) energy, supported by Gaussian regularization to ensure smooth and coherent boundaries in regions with ambiguous tissue transitions. Building upon the refined semantic tumor mask, the framework further incorporates a panoptic-style tumor instance segmentation stage, enabling the decomposition of connected tumor regions into distinct anatomical instances, which were evaluated on both MIAS and INBreast mammography datasets to demonstrate generalizability. This extension facilitates detailed structural analysis of tumor multiplicity and spatial organization, enhancing interpretability beyond conventional pixel wise segmentation. Experiments conducted on Cranio-Caudal (CC) and Medio-Lateral Oblique (MLO) mammographic views demonstrate competitive performance relative to baseline U-Net and advanced deep learning fusion architectures, including multi-scale and multi-view networks, while offering improved interpretability and robustness. Quantitative evaluation using overlap-related metrics shows strong spatial agreement between predicted and reference segmentations, with per-image Dice Similarity Coefficient (DSC) and Intersection over Union (IoU) distributions reported to ensure reproducibility. Descriptive per-image analysis, supported by bootstrap-based confidence intervals and paired comparisons, indicates consistent performance improvements across images. Robustness analysis under realistic perturbations, including noise, contrast degradation, blur, and rotation, demonstrates stable performance across varying imaging conditions. Furthermore, feature space visualizations using t-SNE and UMAP reveal clear separability between cancerous and non-cancerous tissue regions, highlighting the discriminative capability of the proposed framework. Overall, the results demonstrate the effectiveness, robustness, and clinical motivation of this hybrid panoptic framework for comprehensive dense breast tumor analysis in mammography, while emphasizing reproducibility and conservative statistical assessment.</p>
	]]></content:encoded>

	<dc:title>Hybrid MICO-LAC Segmentation with Panoptic Tumor Instance Analysis for Dense Breast Mammograms</dc:title>
			<dc:creator>Razia Jamil</dc:creator>
			<dc:creator>Min Dong</dc:creator>
			<dc:creator>Orken Mamyrbayev</dc:creator>
			<dc:creator>Ainur Akhmediyarova</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030095</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-24</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-24</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>95</prism:startingPage>
		<prism:doi>10.3390/jimaging12030095</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/95</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/94">

	<title>J. Imaging, Vol. 12, Pages 94: Towards Lightweight and Multi-Scale Scene Classification: A Lie Group-Guided Deep Learning Network with Collaborative Attention</title>
	<link>https://www.mdpi.com/2313-433X/12/3/94</link>
	<description>Remote sensing scene classification (RSSC) plays a crucial role in Earth observation. Current deep learning methods, while accurate, tend to focus on high-level semantic features and overlook complementary shallow details such as edges and textures. Moreover, conventional CNNs are limited by fixed receptive fields, whereas transformers incur high computational costs. To address these limitations, we propose the Lie Group lightweight multi-scale network (LGLMNet), a lightweight multi-scale network that integrates Lie Group covariance features. It employs a dual-branch architecture combining Lie Group machine learning (LGML) for shallow feature extraction and a deep learning branch for high-level semantics. In the deep branch, we design a parallel depthwise separable convolution block (PDSCB) for multi-scale perception and a spatial-channel collaborative attention mechanism (SCCA) for efficient global&amp;amp;ndash;local modeling. A cross-layer feature fusion block (CLFFB) effectively merges the two branches. Compared with state-of-the-art methods, the proposed LGLMNet achieves accuracy improvements of 2.14%, 2.32%, and 1.12% on UCM-21, AID, and NWPU-45 datasets, respectively, while maintaining a lightweight structure with only 2.6 M parameters.</description>
	<pubDate>2026-02-24</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 94: Towards Lightweight and Multi-Scale Scene Classification: A Lie Group-Guided Deep Learning Network with Collaborative Attention</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/94">doi: 10.3390/jimaging12030094</a></p>
	<p>Authors:
		Xuefei Xu
		Chengjun Xu
		</p>
	<p>Remote sensing scene classification (RSSC) plays a crucial role in Earth observation. Current deep learning methods, while accurate, tend to focus on high-level semantic features and overlook complementary shallow details such as edges and textures. Moreover, conventional CNNs are limited by fixed receptive fields, whereas transformers incur high computational costs. To address these limitations, we propose the Lie Group lightweight multi-scale network (LGLMNet), a lightweight multi-scale network that integrates Lie Group covariance features. It employs a dual-branch architecture combining Lie Group machine learning (LGML) for shallow feature extraction and a deep learning branch for high-level semantics. In the deep branch, we design a parallel depthwise separable convolution block (PDSCB) for multi-scale perception and a spatial-channel collaborative attention mechanism (SCCA) for efficient global&amp;amp;ndash;local modeling. A cross-layer feature fusion block (CLFFB) effectively merges the two branches. Compared with state-of-the-art methods, the proposed LGLMNet achieves accuracy improvements of 2.14%, 2.32%, and 1.12% on UCM-21, AID, and NWPU-45 datasets, respectively, while maintaining a lightweight structure with only 2.6 M parameters.</p>
	]]></content:encoded>

	<dc:title>Towards Lightweight and Multi-Scale Scene Classification: A Lie Group-Guided Deep Learning Network with Collaborative Attention</dc:title>
			<dc:creator>Xuefei Xu</dc:creator>
			<dc:creator>Chengjun Xu</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030094</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-24</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-24</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>94</prism:startingPage>
		<prism:doi>10.3390/jimaging12030094</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/94</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/93">

	<title>J. Imaging, Vol. 12, Pages 93: LTPNet: Lesion-Aware Triple-Path Feature Fusion Network for Skin Lesion Segmentation</title>
	<link>https://www.mdpi.com/2313-433X/12/3/93</link>
	<description>Skin lesion segmentation has achieved notable progress in recent years; however, accurate delineation remains challenging due to complex backgrounds, ambiguous boundaries, and low lesion-to-skin contrast. To address these issues, we propose the lesion-aware triple-path feature fusion network (LTPNet), an end-to-end framework that progressively processes features through extraction, refinement, and aggregation stages. In the extraction stage, we incorporate a general foreground&amp;amp;ndash;background attention to suppress background interference and accelerate model convergence. In the refinement stage, we introduce an attentive spatial modulator (ASM) to jointly exploit local structural cues and global semantic context for precise spatial modulation. We further develop a lesion-aware lite-gate attention (LALGA) module that performs local spatial feature modulation and global channel recalibration tailored to lesion characteristics. In the aggregation stage, we propose a triple-path feature fusion (TPFF) module that explicitly models feature relationships across scales via three complementary pathways: a common path (CP) for semantic consistency, a saliency path (SP) for highlighting co-activated regions, and a difference path (DP) for accentuating structural discrepancies. Extensive experiments on in-domain and cross-domain datasets show that LTPNet achieves superior segmentation accuracy with reasonable inference efficiency and model complexity, demonstrating its potential for efficient and reliable clinical decision support.</description>
	<pubDate>2026-02-24</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 93: LTPNet: Lesion-Aware Triple-Path Feature Fusion Network for Skin Lesion Segmentation</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/93">doi: 10.3390/jimaging12030093</a></p>
	<p>Authors:
		Yange Sun
		Sen Chen
		Huaping Guo
		Li Zhang
		Hongzhou Yue
		Yan Feng
		</p>
	<p>Skin lesion segmentation has achieved notable progress in recent years; however, accurate delineation remains challenging due to complex backgrounds, ambiguous boundaries, and low lesion-to-skin contrast. To address these issues, we propose the lesion-aware triple-path feature fusion network (LTPNet), an end-to-end framework that progressively processes features through extraction, refinement, and aggregation stages. In the extraction stage, we incorporate a general foreground&amp;amp;ndash;background attention to suppress background interference and accelerate model convergence. In the refinement stage, we introduce an attentive spatial modulator (ASM) to jointly exploit local structural cues and global semantic context for precise spatial modulation. We further develop a lesion-aware lite-gate attention (LALGA) module that performs local spatial feature modulation and global channel recalibration tailored to lesion characteristics. In the aggregation stage, we propose a triple-path feature fusion (TPFF) module that explicitly models feature relationships across scales via three complementary pathways: a common path (CP) for semantic consistency, a saliency path (SP) for highlighting co-activated regions, and a difference path (DP) for accentuating structural discrepancies. Extensive experiments on in-domain and cross-domain datasets show that LTPNet achieves superior segmentation accuracy with reasonable inference efficiency and model complexity, demonstrating its potential for efficient and reliable clinical decision support.</p>
	]]></content:encoded>

	<dc:title>LTPNet: Lesion-Aware Triple-Path Feature Fusion Network for Skin Lesion Segmentation</dc:title>
			<dc:creator>Yange Sun</dc:creator>
			<dc:creator>Sen Chen</dc:creator>
			<dc:creator>Huaping Guo</dc:creator>
			<dc:creator>Li Zhang</dc:creator>
			<dc:creator>Hongzhou Yue</dc:creator>
			<dc:creator>Yan Feng</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030093</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-24</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-24</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>93</prism:startingPage>
		<prism:doi>10.3390/jimaging12030093</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/93</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/3/92">

	<title>J. Imaging, Vol. 12, Pages 92: UVSegNet: Semantic Boundary-Aware Neural UV Parameterization for Man-Made Objects</title>
	<link>https://www.mdpi.com/2313-433X/12/3/92</link>
	<description>UV parameterization is a fundamental step in building textured 3D models, but minimizing texture distortion and ensuring seams are placed along meaningful boundaries remains a challenge. This paper proposes UVSegNet, a novel semantic boundary-aware UV parameterization framework that combines part-level segmentation with geometry-aware parameterization. To address the common seam placement issues in parameterization, we introduce a boundary-aware guided UV mapping module that jointly optimizes geometric accuracy and seam layout. Furthermore, to better handle the cylindrical structures common in man-made objects, we introduce a cylindrical supervision strategy to reduce misalignment and unfolding distortion. Experiments on representative object categories show that UVSegNet outperforms other excellent baseline models in both texture quality and seam quality. Compared to Nuvo, UVSegNet improves the angular distortion (conformality) metric by 24.1% and seam compactness by 60.5% by generating a more compact seam layout. Experimental results demonstrate that UVSegNet outperforms baseline methods in both mapping quality and seam quality, thanks to the complementary mechanism of boundary constraints and geometry-driven modeling.</description>
	<pubDate>2026-02-24</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 92: UVSegNet: Semantic Boundary-Aware Neural UV Parameterization for Man-Made Objects</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/3/92">doi: 10.3390/jimaging12030092</a></p>
	<p>Authors:
		Hairun Zhang
		Ying Song
		</p>
	<p>UV parameterization is a fundamental step in building textured 3D models, but minimizing texture distortion and ensuring seams are placed along meaningful boundaries remains a challenge. This paper proposes UVSegNet, a novel semantic boundary-aware UV parameterization framework that combines part-level segmentation with geometry-aware parameterization. To address the common seam placement issues in parameterization, we introduce a boundary-aware guided UV mapping module that jointly optimizes geometric accuracy and seam layout. Furthermore, to better handle the cylindrical structures common in man-made objects, we introduce a cylindrical supervision strategy to reduce misalignment and unfolding distortion. Experiments on representative object categories show that UVSegNet outperforms other excellent baseline models in both texture quality and seam quality. Compared to Nuvo, UVSegNet improves the angular distortion (conformality) metric by 24.1% and seam compactness by 60.5% by generating a more compact seam layout. Experimental results demonstrate that UVSegNet outperforms baseline methods in both mapping quality and seam quality, thanks to the complementary mechanism of boundary constraints and geometry-driven modeling.</p>
	]]></content:encoded>

	<dc:title>UVSegNet: Semantic Boundary-Aware Neural UV Parameterization for Man-Made Objects</dc:title>
			<dc:creator>Hairun Zhang</dc:creator>
			<dc:creator>Ying Song</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12030092</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-24</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-24</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>3</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>92</prism:startingPage>
		<prism:doi>10.3390/jimaging12030092</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/3/92</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/91">

	<title>J. Imaging, Vol. 12, Pages 91: Accelerating Point Cloud Computation via Memory in Embedded Structured Light Cameras</title>
	<link>https://www.mdpi.com/2313-433X/12/2/91</link>
	<description>Embedded structured light cameras have been widely applied in various fields. However, due to constraints such as insufficient computing resources, it remains difficult to achieve high-speed structured light point cloud computation. To address this issue, this study proposes a memory-driven computational framework for accelerating point cloud computation. Specifically, the point cloud computation process is precomputed as much as possible and stored in memory in the form of parameters, thereby significantly reducing the computational load during actual point cloud computation. The framework is instantiated in two forms: a low-memory method that minimizes memory footprint at the expense of point cloud stability, and a high-memory method that preserves the nonlinear phase&amp;amp;ndash;distance relation via an extensive lookup table. Experimental evaluations demonstrate that the proposed methods achieve comparable accuracy to the conventional method while delivering substantial speedups, and data-format optimizations further reduce required bandwidth. This framework offers a generalizable paradigm for optimizing structured light pipelines, paving the way for enhanced real-time 3D sensing in embedded applications.</description>
	<pubDate>2026-02-21</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 91: Accelerating Point Cloud Computation via Memory in Embedded Structured Light Cameras</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/91">doi: 10.3390/jimaging12020091</a></p>
	<p>Authors:
		Yanan Zhang
		Shikang Meng
		Shijie Wang
		Yaheng Ren
		</p>
	<p>Embedded structured light cameras have been widely applied in various fields. However, due to constraints such as insufficient computing resources, it remains difficult to achieve high-speed structured light point cloud computation. To address this issue, this study proposes a memory-driven computational framework for accelerating point cloud computation. Specifically, the point cloud computation process is precomputed as much as possible and stored in memory in the form of parameters, thereby significantly reducing the computational load during actual point cloud computation. The framework is instantiated in two forms: a low-memory method that minimizes memory footprint at the expense of point cloud stability, and a high-memory method that preserves the nonlinear phase&amp;amp;ndash;distance relation via an extensive lookup table. Experimental evaluations demonstrate that the proposed methods achieve comparable accuracy to the conventional method while delivering substantial speedups, and data-format optimizations further reduce required bandwidth. This framework offers a generalizable paradigm for optimizing structured light pipelines, paving the way for enhanced real-time 3D sensing in embedded applications.</p>
	]]></content:encoded>

	<dc:title>Accelerating Point Cloud Computation via Memory in Embedded Structured Light Cameras</dc:title>
			<dc:creator>Yanan Zhang</dc:creator>
			<dc:creator>Shikang Meng</dc:creator>
			<dc:creator>Shijie Wang</dc:creator>
			<dc:creator>Yaheng Ren</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020091</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-21</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-21</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>91</prism:startingPage>
		<prism:doi>10.3390/jimaging12020091</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/91</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/90">

	<title>J. Imaging, Vol. 12, Pages 90: MDF2Former: Multi-Scale Dual-Domain Feature Fusion Transformer for Hyperspectral Image Classification of Bacteria in Murine Wounds</title>
	<link>https://www.mdpi.com/2313-433X/12/2/90</link>
	<description>Bacterial wound infection poses a major challenge in trauma care and can lead to severe complications such as sepsis and organ failure. Therefore, rapid and accurate identification of the pathogen, along with targeted intervention, is of vital importance for improving treatment outcomes and reducing risks. However, current detection methods are still constrained by procedural complexity and long processing times. In this study, a hyperspectral imaging (HSI) acquisition system for bacterial analysis and a multi-scale dual-domain feature fusion transformer (MDF2Former) were developed for classifying wound bacteria. MDF2Former integrates three modules: a multi-scale feature enhancement and fusion module that generates tokens with multi-scale discriminative representations, a spatial&amp;amp;ndash;spectral dual-branch attention module that strengthens joint feature modeling, and a frequency and spatial&amp;amp;ndash;spectral domain encoding module that captures global and local interactions among tokens through a hierarchical stacking structure, thereby enabling more efficient feature learning. Extensive experiments on our self-constructed HSI dataset of typical wound bacteria demonstrate that MDF2Former achieved outstanding performance across five metrics: Accuracy (91.94%), Precision (92.26%), Recall (91.94%), F1-score (92.01%), and Kappa coefficient (90.73%), surpassing all comparative models. These results have verified the effectiveness of combining HSI with deep learning for bacterial identification, and have highlighted its potential in assisting in the identification of bacterial species and making personalized treatment decisions for wound infections.</description>
	<pubDate>2026-02-19</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 90: MDF2Former: Multi-Scale Dual-Domain Feature Fusion Transformer for Hyperspectral Image Classification of Bacteria in Murine Wounds</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/90">doi: 10.3390/jimaging12020090</a></p>
	<p>Authors:
		Decheng Wu
		Wendan Liu
		Rui Li
		Xudong Fu
		Lin Tao
		Yinli Tian
		Anqiang Zhang
		Zhen Wang
		Hao Tang
		</p>
	<p>Bacterial wound infection poses a major challenge in trauma care and can lead to severe complications such as sepsis and organ failure. Therefore, rapid and accurate identification of the pathogen, along with targeted intervention, is of vital importance for improving treatment outcomes and reducing risks. However, current detection methods are still constrained by procedural complexity and long processing times. In this study, a hyperspectral imaging (HSI) acquisition system for bacterial analysis and a multi-scale dual-domain feature fusion transformer (MDF2Former) were developed for classifying wound bacteria. MDF2Former integrates three modules: a multi-scale feature enhancement and fusion module that generates tokens with multi-scale discriminative representations, a spatial&amp;amp;ndash;spectral dual-branch attention module that strengthens joint feature modeling, and a frequency and spatial&amp;amp;ndash;spectral domain encoding module that captures global and local interactions among tokens through a hierarchical stacking structure, thereby enabling more efficient feature learning. Extensive experiments on our self-constructed HSI dataset of typical wound bacteria demonstrate that MDF2Former achieved outstanding performance across five metrics: Accuracy (91.94%), Precision (92.26%), Recall (91.94%), F1-score (92.01%), and Kappa coefficient (90.73%), surpassing all comparative models. These results have verified the effectiveness of combining HSI with deep learning for bacterial identification, and have highlighted its potential in assisting in the identification of bacterial species and making personalized treatment decisions for wound infections.</p>
	]]></content:encoded>

	<dc:title>MDF2Former: Multi-Scale Dual-Domain Feature Fusion Transformer for Hyperspectral Image Classification of Bacteria in Murine Wounds</dc:title>
			<dc:creator>Decheng Wu</dc:creator>
			<dc:creator>Wendan Liu</dc:creator>
			<dc:creator>Rui Li</dc:creator>
			<dc:creator>Xudong Fu</dc:creator>
			<dc:creator>Lin Tao</dc:creator>
			<dc:creator>Yinli Tian</dc:creator>
			<dc:creator>Anqiang Zhang</dc:creator>
			<dc:creator>Zhen Wang</dc:creator>
			<dc:creator>Hao Tang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020090</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-19</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-19</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>90</prism:startingPage>
		<prism:doi>10.3390/jimaging12020090</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/90</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/89">

	<title>J. Imaging, Vol. 12, Pages 89: Classification of the Surrounding Rock Based on Image Processing Analysis and Transfer Learning</title>
	<link>https://www.mdpi.com/2313-433X/12/2/89</link>
	<description>Currently, standardized classification methods of surrounding rock are relatively insufficient. The classification of surrounding rock mainly relies on the subjective judgment of technicians, leading to diverse evaluation results. This study focuses on the feature extraction and classification methods of surrounding rock images in a certain tunnel of the Central Yunnan Water Diversion Project by using image processing analysis and transfer learning. Rich surrounding rock images and the water conservancy tunnel data are collected, and then the surrounding rock is classified relatively accurately according to the code and expert guidance. By introducing the fractal theory, the complexity and irregularity of the spatial distribution of weak layers and joints on the surrounding rock surface are revealed effectively. Based on the analysis of changes in fractal dimension characteristic values, a classification method for surrounding rock based on the fractal theory is proposed. Combined with the quantified parameters of surrounding rock images and the strength data collected by rebound meters, a method for correcting the surrounding rock strength based on image analysis is proposed, which can effectively solve the error caused by the uneven distribution of rock masses in the traditional rebound meter strength values. After correction, more accurate strength characteristics can be obtained, which is conducive to the standardized classification of the surrounding rock. After studying the recognition of tunnel surrounding rock images with transfer learning, a model is constructed to achieve rapid classification of tunnel surrounding rock. This research provides support for the standardized classification of tunnel surrounding rock.</description>
	<pubDate>2026-02-19</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 89: Classification of the Surrounding Rock Based on Image Processing Analysis and Transfer Learning</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/89">doi: 10.3390/jimaging12020089</a></p>
	<p>Authors:
		Yanyun Fan
		Jiaqi Zhu
		Hua Luo
		Yaxi Shen
		Shuanglong Wang
		Xiaoning Liu
		Dong Li
		Chuhan Deng
		</p>
	<p>Currently, standardized classification methods of surrounding rock are relatively insufficient. The classification of surrounding rock mainly relies on the subjective judgment of technicians, leading to diverse evaluation results. This study focuses on the feature extraction and classification methods of surrounding rock images in a certain tunnel of the Central Yunnan Water Diversion Project by using image processing analysis and transfer learning. Rich surrounding rock images and the water conservancy tunnel data are collected, and then the surrounding rock is classified relatively accurately according to the code and expert guidance. By introducing the fractal theory, the complexity and irregularity of the spatial distribution of weak layers and joints on the surrounding rock surface are revealed effectively. Based on the analysis of changes in fractal dimension characteristic values, a classification method for surrounding rock based on the fractal theory is proposed. Combined with the quantified parameters of surrounding rock images and the strength data collected by rebound meters, a method for correcting the surrounding rock strength based on image analysis is proposed, which can effectively solve the error caused by the uneven distribution of rock masses in the traditional rebound meter strength values. After correction, more accurate strength characteristics can be obtained, which is conducive to the standardized classification of the surrounding rock. After studying the recognition of tunnel surrounding rock images with transfer learning, a model is constructed to achieve rapid classification of tunnel surrounding rock. This research provides support for the standardized classification of tunnel surrounding rock.</p>
	]]></content:encoded>

	<dc:title>Classification of the Surrounding Rock Based on Image Processing Analysis and Transfer Learning</dc:title>
			<dc:creator>Yanyun Fan</dc:creator>
			<dc:creator>Jiaqi Zhu</dc:creator>
			<dc:creator>Hua Luo</dc:creator>
			<dc:creator>Yaxi Shen</dc:creator>
			<dc:creator>Shuanglong Wang</dc:creator>
			<dc:creator>Xiaoning Liu</dc:creator>
			<dc:creator>Dong Li</dc:creator>
			<dc:creator>Chuhan Deng</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020089</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-19</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-19</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>89</prism:startingPage>
		<prism:doi>10.3390/jimaging12020089</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/89</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/88">

	<title>J. Imaging, Vol. 12, Pages 88: Analysis of Biological Images and Quantitative Monitoring Using Deep Learning and Computer Vision</title>
	<link>https://www.mdpi.com/2313-433X/12/2/88</link>
	<description>Automated biological counting is essential for scaling wildlife monitoring and biodiversity assessments, as manual processing currently limits analytical effort and scalability. This review evaluates the integration of deep learning and computer vision across diverse acquisition platforms, including camera traps, unmanned aerial vehicles (UAVs), and remote sensing. Methodological paradigms ranging from Convolutional Neural Networks (CNNs) and one-stage detectors like You Only Look Once (YOLO) to recent transformer-based architectures and hybrid models are examined. The literature shows that these methods consistently achieve high accuracy&amp;amp;mdash;often exceeding 95%&amp;amp;mdash;across various taxa, including insect pests, aquatic organisms, terrestrial vegetation, and forest ecosystems. However, persistent challenges such as object occlusion, cryptic species differentiation, and the scarcity of high-quality, labeled datasets continue to hinder fully automated workflows. We conclude that while automated counting has fundamentally increased data throughput, future advancements must focus on enhancing model generalization through self-supervised learning and improved data augmentation techniques. These developments are critical for transitioning from experimental models to robust, operational tools for global ecological monitoring and conservation efforts.</description>
	<pubDate>2026-02-18</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 88: Analysis of Biological Images and Quantitative Monitoring Using Deep Learning and Computer Vision</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/88">doi: 10.3390/jimaging12020088</a></p>
	<p>Authors:
		Aaron Gálvez-Salido
		Francisca Robles
		Rodrigo J. Gonçalves
		Roberto de la Herrán
		Carmelo Ruiz Rejón
		Rafael Navajas-Pérez
		</p>
	<p>Automated biological counting is essential for scaling wildlife monitoring and biodiversity assessments, as manual processing currently limits analytical effort and scalability. This review evaluates the integration of deep learning and computer vision across diverse acquisition platforms, including camera traps, unmanned aerial vehicles (UAVs), and remote sensing. Methodological paradigms ranging from Convolutional Neural Networks (CNNs) and one-stage detectors like You Only Look Once (YOLO) to recent transformer-based architectures and hybrid models are examined. The literature shows that these methods consistently achieve high accuracy&amp;amp;mdash;often exceeding 95%&amp;amp;mdash;across various taxa, including insect pests, aquatic organisms, terrestrial vegetation, and forest ecosystems. However, persistent challenges such as object occlusion, cryptic species differentiation, and the scarcity of high-quality, labeled datasets continue to hinder fully automated workflows. We conclude that while automated counting has fundamentally increased data throughput, future advancements must focus on enhancing model generalization through self-supervised learning and improved data augmentation techniques. These developments are critical for transitioning from experimental models to robust, operational tools for global ecological monitoring and conservation efforts.</p>
	]]></content:encoded>

	<dc:title>Analysis of Biological Images and Quantitative Monitoring Using Deep Learning and Computer Vision</dc:title>
			<dc:creator>Aaron Gálvez-Salido</dc:creator>
			<dc:creator>Francisca Robles</dc:creator>
			<dc:creator>Rodrigo J. Gonçalves</dc:creator>
			<dc:creator>Roberto de la Herrán</dc:creator>
			<dc:creator>Carmelo Ruiz Rejón</dc:creator>
			<dc:creator>Rafael Navajas-Pérez</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020088</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-18</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-18</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Review</prism:section>
	<prism:startingPage>88</prism:startingPage>
		<prism:doi>10.3390/jimaging12020088</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/88</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/87">

	<title>J. Imaging, Vol. 12, Pages 87: Automated Compactness Quantitative Metrics for Wrist Bone on Conventional Radiography in Rheumatoid Arthritis: A Clinical Evaluation Study</title>
	<link>https://www.mdpi.com/2313-433X/12/2/87</link>
	<description>Rheumatoid arthritis (RA) frequently affects the joints of the hands, with joint space narrowing (JSN) representing an important early marker of structural damage. The semi-quantitative Sharp/van der Heijde (SvdH) scoring system is widely used in clinical practice but is inherently subjective and susceptible to observer variability. Moreover, the complex anatomy of the wrist and substantial overlap of carpal bones pose challenges for automated quantitative assessment of wrist JSN on routine radiographs. This study aimed to introduce a novel quantitative assessment perspective and to clinically validate an automated, compactness-related quantification framework for evaluating wrist JSN in RA. This study initially enrolled 51 patients with RA. After excluding one case with severe carpal fusion that precluded anatomical differentiation, 50 patients (44 females and 6 males) were included in the final analysis. The cohort had a mean age of 61 years (range: 21&amp;amp;ndash;82), a median symptom duration of 9 years (IQR: 1&amp;amp;ndash;32), and a median follow-up interval for bilateral hand radiographs of 1.06 years (IQR: 0.82&amp;amp;ndash;1.30). To quantify global wrist JSN, 10 compactness-related metrics were computed based on the spatial distribution of bone centroids extracted from carpal segmentation masks. These metrics were validated against the wrist JSN subscore of the SvdH score (SvdH-JSN_wrist) and the total Sharp score (TSS) as gold standards. Several distance-based metrics among the compactness-related metrics showed significant negative correlations with the wrist joint space narrowing subscore of the Sharp/van der Heijde score (SvdH-JSN_wrist). Specifically, mean-pairwise-distance (MPD), root-mean-square-radius (RMSR), and median-radius (R50) showed moderate to strong correlations (r = &amp;amp;minus;0.52 to &amp;amp;minus;0.63, all p&amp;amp;le;0.0001) that were consistent at BL and FU. Correlations with TSS were weaker overall, with only R50 and its normalized form showing stable negative correlations (r = &amp;amp;minus;0.40 to &amp;amp;minus;0.43, p &amp;amp;lt; 0.01). Longitudinal analyses showed limited correlations between metric changes and clinical score changes. The proposed automated compactness quantification framework enables objective and reliable assessment of wrist JSN on standard radiographs and complements conventional scoring systems by supporting automated and standardized evaluation of RA-related wrist structural changes.</description>
	<pubDate>2026-02-18</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 87: Automated Compactness Quantitative Metrics for Wrist Bone on Conventional Radiography in Rheumatoid Arthritis: A Clinical Evaluation Study</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/87">doi: 10.3390/jimaging12020087</a></p>
	<p>Authors:
		Jiajing Zhou
		Junmu Peng
		Haolin Wang
		Hiroshi Kataoka
		Masaya Mukai
		Tunlada Wiriyanukhroh
		Tamotsu Kamishima
		</p>
	<p>Rheumatoid arthritis (RA) frequently affects the joints of the hands, with joint space narrowing (JSN) representing an important early marker of structural damage. The semi-quantitative Sharp/van der Heijde (SvdH) scoring system is widely used in clinical practice but is inherently subjective and susceptible to observer variability. Moreover, the complex anatomy of the wrist and substantial overlap of carpal bones pose challenges for automated quantitative assessment of wrist JSN on routine radiographs. This study aimed to introduce a novel quantitative assessment perspective and to clinically validate an automated, compactness-related quantification framework for evaluating wrist JSN in RA. This study initially enrolled 51 patients with RA. After excluding one case with severe carpal fusion that precluded anatomical differentiation, 50 patients (44 females and 6 males) were included in the final analysis. The cohort had a mean age of 61 years (range: 21&amp;amp;ndash;82), a median symptom duration of 9 years (IQR: 1&amp;amp;ndash;32), and a median follow-up interval for bilateral hand radiographs of 1.06 years (IQR: 0.82&amp;amp;ndash;1.30). To quantify global wrist JSN, 10 compactness-related metrics were computed based on the spatial distribution of bone centroids extracted from carpal segmentation masks. These metrics were validated against the wrist JSN subscore of the SvdH score (SvdH-JSN_wrist) and the total Sharp score (TSS) as gold standards. Several distance-based metrics among the compactness-related metrics showed significant negative correlations with the wrist joint space narrowing subscore of the Sharp/van der Heijde score (SvdH-JSN_wrist). Specifically, mean-pairwise-distance (MPD), root-mean-square-radius (RMSR), and median-radius (R50) showed moderate to strong correlations (r = &amp;amp;minus;0.52 to &amp;amp;minus;0.63, all p&amp;amp;le;0.0001) that were consistent at BL and FU. Correlations with TSS were weaker overall, with only R50 and its normalized form showing stable negative correlations (r = &amp;amp;minus;0.40 to &amp;amp;minus;0.43, p &amp;amp;lt; 0.01). Longitudinal analyses showed limited correlations between metric changes and clinical score changes. The proposed automated compactness quantification framework enables objective and reliable assessment of wrist JSN on standard radiographs and complements conventional scoring systems by supporting automated and standardized evaluation of RA-related wrist structural changes.</p>
	]]></content:encoded>

	<dc:title>Automated Compactness Quantitative Metrics for Wrist Bone on Conventional Radiography in Rheumatoid Arthritis: A Clinical Evaluation Study</dc:title>
			<dc:creator>Jiajing Zhou</dc:creator>
			<dc:creator>Junmu Peng</dc:creator>
			<dc:creator>Haolin Wang</dc:creator>
			<dc:creator>Hiroshi Kataoka</dc:creator>
			<dc:creator>Masaya Mukai</dc:creator>
			<dc:creator>Tunlada Wiriyanukhroh</dc:creator>
			<dc:creator>Tamotsu Kamishima</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020087</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-18</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-18</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>87</prism:startingPage>
		<prism:doi>10.3390/jimaging12020087</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/87</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/85">

	<title>J. Imaging, Vol. 12, Pages 85: SREF: Semantics-Refined Feature Extraction for Long-Term Visual Localization</title>
	<link>https://www.mdpi.com/2313-433X/12/2/85</link>
	<description>Accurate and robust visual localization under changing environments remains a fundamental challenge in autonomous driving and mobile robotics. Traditional handcrafted features often degrade under long-term illumination and viewpoint variations, while recent CNN-based methods, although more robust, typically rely on coarse semantic cues and remain vulnerable to dynamic objects. In this paper, we propose a fine-grained semantics-guided feature extraction framework that adaptively selects stable keypoints while suppressing dynamic disturbances. A fine-grained semantic refinement module subdivides coarse semantic categories into stability-homogeneous sub-classes, and a dual-attention mechanism enhances local repeatability and semantic consistency. By integrating physical priors with self-supervised clustering, the proposed framework learns discriminative and reliable feature representations. Extensive experiments on the Aachen and RobotCar-Seasons benchmarks demonstrate that the proposed approach achieves state-of-the-art accuracy and robustness while maintaining real-time efficiency, effectively bridging coarse semantic guidance with fine-grained stability estimation. Quantitatively, our method achieves strong localization performance on Aachen (up to 88.1% at night under the (0.2&amp;amp;deg;,0.25&amp;amp;nbsp;m) threshold) and on RobotCar-Seasons (up to 57.2%/28.4% under the same threshold for day/night), demonstrating improved robustness to seasonal and illumination changes.</description>
	<pubDate>2026-02-18</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 85: SREF: Semantics-Refined Feature Extraction for Long-Term Visual Localization</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/85">doi: 10.3390/jimaging12020085</a></p>
	<p>Authors:
		Danfeng Wu
		Kaifeng Zhu
		Heng Shi
		Fenfen Zhou
		Minchi Kuang
		</p>
	<p>Accurate and robust visual localization under changing environments remains a fundamental challenge in autonomous driving and mobile robotics. Traditional handcrafted features often degrade under long-term illumination and viewpoint variations, while recent CNN-based methods, although more robust, typically rely on coarse semantic cues and remain vulnerable to dynamic objects. In this paper, we propose a fine-grained semantics-guided feature extraction framework that adaptively selects stable keypoints while suppressing dynamic disturbances. A fine-grained semantic refinement module subdivides coarse semantic categories into stability-homogeneous sub-classes, and a dual-attention mechanism enhances local repeatability and semantic consistency. By integrating physical priors with self-supervised clustering, the proposed framework learns discriminative and reliable feature representations. Extensive experiments on the Aachen and RobotCar-Seasons benchmarks demonstrate that the proposed approach achieves state-of-the-art accuracy and robustness while maintaining real-time efficiency, effectively bridging coarse semantic guidance with fine-grained stability estimation. Quantitatively, our method achieves strong localization performance on Aachen (up to 88.1% at night under the (0.2&amp;amp;deg;,0.25&amp;amp;nbsp;m) threshold) and on RobotCar-Seasons (up to 57.2%/28.4% under the same threshold for day/night), demonstrating improved robustness to seasonal and illumination changes.</p>
	]]></content:encoded>

	<dc:title>SREF: Semantics-Refined Feature Extraction for Long-Term Visual Localization</dc:title>
			<dc:creator>Danfeng Wu</dc:creator>
			<dc:creator>Kaifeng Zhu</dc:creator>
			<dc:creator>Heng Shi</dc:creator>
			<dc:creator>Fenfen Zhou</dc:creator>
			<dc:creator>Minchi Kuang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020085</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-18</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-18</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>85</prism:startingPage>
		<prism:doi>10.3390/jimaging12020085</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/85</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/86">

	<title>J. Imaging, Vol. 12, Pages 86: Print Quality Assessment of QR Code Elements Achieved by the Digital Thermal Transfer Process</title>
	<link>https://www.mdpi.com/2313-433X/12/2/86</link>
	<description>The new European Regulation (EU) 2025/40 includes provisions on modern packaging and packaging waste. It defines the use of image QR codes on packaging (items 71 and 161) and in personal documents, making line barcodes a thing of the past. The definition of a QR code is precisely specified in ISO/IEC 18004:2024. However, their implementation in printing systems is not specified and remains an important factor for their future application. Digital foil printing is a completely new hybrid printing process for applying information to highly precise applications such as QR codes, security printing, and packaging printing. The technique is characterized by a combination of two printing techniques: drop-on-demand UV inkjet followed by thermal transfer of black foil. Using a matte-coated printing substrate (Garda Matt, 300 g/m2), Konica Minolta KM1024 LHE Inkjet head settings, and a transfer temperature of 100 &amp;amp;deg;C, the size of the square printing elements in QR codes plays a decisive role in the quality of the decoded information. The aim of this work is to investigate the possibility of realizing the basic elements of the QR code image (the profile of square elements and the success of realizing a precisely defined surface) with a variation in the thickness of the UV varnish coating (7, 14 and 21 &amp;amp;micro;m), realized using the MGI JETvarnish 3DS digital machine. The most commonly used rectangular elements with a surface area of 0.01 cm2 were tested: 0.06 cm2, 0.25 cm2, 1 cm2, 4 cm2, and 16 cm2. The results showed that the imprint quality is uneven for the smallest elements (square elements with base lengths of 0.1 cm and 0.25 cm). The effect is especially visible with a minimum UV varnish application of 7 &amp;amp;mu;m (1 drop). By increasing the amount of UV varnish and the application thickness to 14 &amp;amp;mu;m (2 drops) and 21 &amp;amp;mu;m (3 drops), respectively, a significantly more stable, even reproduction of the achromatic image is achieved. The highest technical precision was achieved with a UV varnish thickness of 21 &amp;amp;mu;m.</description>
	<pubDate>2026-02-18</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 86: Print Quality Assessment of QR Code Elements Achieved by the Digital Thermal Transfer Process</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/86">doi: 10.3390/jimaging12020086</a></p>
	<p>Authors:
		Igor Majnarić
		Marija Jelkić
		Marko Morić
		Krunoslav Hajdek
		</p>
	<p>The new European Regulation (EU) 2025/40 includes provisions on modern packaging and packaging waste. It defines the use of image QR codes on packaging (items 71 and 161) and in personal documents, making line barcodes a thing of the past. The definition of a QR code is precisely specified in ISO/IEC 18004:2024. However, their implementation in printing systems is not specified and remains an important factor for their future application. Digital foil printing is a completely new hybrid printing process for applying information to highly precise applications such as QR codes, security printing, and packaging printing. The technique is characterized by a combination of two printing techniques: drop-on-demand UV inkjet followed by thermal transfer of black foil. Using a matte-coated printing substrate (Garda Matt, 300 g/m2), Konica Minolta KM1024 LHE Inkjet head settings, and a transfer temperature of 100 &amp;amp;deg;C, the size of the square printing elements in QR codes plays a decisive role in the quality of the decoded information. The aim of this work is to investigate the possibility of realizing the basic elements of the QR code image (the profile of square elements and the success of realizing a precisely defined surface) with a variation in the thickness of the UV varnish coating (7, 14 and 21 &amp;amp;micro;m), realized using the MGI JETvarnish 3DS digital machine. The most commonly used rectangular elements with a surface area of 0.01 cm2 were tested: 0.06 cm2, 0.25 cm2, 1 cm2, 4 cm2, and 16 cm2. The results showed that the imprint quality is uneven for the smallest elements (square elements with base lengths of 0.1 cm and 0.25 cm). The effect is especially visible with a minimum UV varnish application of 7 &amp;amp;mu;m (1 drop). By increasing the amount of UV varnish and the application thickness to 14 &amp;amp;mu;m (2 drops) and 21 &amp;amp;mu;m (3 drops), respectively, a significantly more stable, even reproduction of the achromatic image is achieved. The highest technical precision was achieved with a UV varnish thickness of 21 &amp;amp;mu;m.</p>
	]]></content:encoded>

	<dc:title>Print Quality Assessment of QR Code Elements Achieved by the Digital Thermal Transfer Process</dc:title>
			<dc:creator>Igor Majnarić</dc:creator>
			<dc:creator>Marija Jelkić</dc:creator>
			<dc:creator>Marko Morić</dc:creator>
			<dc:creator>Krunoslav Hajdek</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020086</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-18</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-18</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>86</prism:startingPage>
		<prism:doi>10.3390/jimaging12020086</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/86</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/84">

	<title>J. Imaging, Vol. 12, Pages 84: LEGS: Visual Localization Enhanced by 3D Gaussian Splatting</title>
	<link>https://www.mdpi.com/2313-433X/12/2/84</link>
	<description>Accurate six-degree-of-freedom (6-DoF) visual localization is a fundamental component for modern mapping and navigation. While recent data-centric approaches have leveraged Novel View Synthesis (NVS) to augment training datasets, these methods typically rely on uniform grid-based sampling of virtual cameras. Such naive placement often yields redundant or weakly informative views, failing to effectively bridge the gap between sparse, unordered captures and dense scene geometry. To address these challenges, we present LEGS (Visual Localization Enhanced by 3D Gaussian Splatting), a trajectory-agnostic synthetic-view augmentation framework. LEGS constructs a joint set of 6-DoF camera pose proposals by integrating a coarse 3D lattice with the Structure-from-Motion (SfM) camera graph, followed by a visibility-aware, coverage-driven selection strategy. By utilizing 3D Gaussian Splatting (3DGS), our framework enables high-throughput, scene-specific synthesis within practical computational budgets. Experiments on standard benchmarks and an in-house dataset demonstrate that LEGS consistently improves pose accuracy and robustness, particularly in scenarios characterized by sparse sampling and co-located viewpoints.</description>
	<pubDate>2026-02-16</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 84: LEGS: Visual Localization Enhanced by 3D Gaussian Splatting</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/84">doi: 10.3390/jimaging12020084</a></p>
	<p>Authors:
		Daewoon Kim
		I-gil Kim
		</p>
	<p>Accurate six-degree-of-freedom (6-DoF) visual localization is a fundamental component for modern mapping and navigation. While recent data-centric approaches have leveraged Novel View Synthesis (NVS) to augment training datasets, these methods typically rely on uniform grid-based sampling of virtual cameras. Such naive placement often yields redundant or weakly informative views, failing to effectively bridge the gap between sparse, unordered captures and dense scene geometry. To address these challenges, we present LEGS (Visual Localization Enhanced by 3D Gaussian Splatting), a trajectory-agnostic synthetic-view augmentation framework. LEGS constructs a joint set of 6-DoF camera pose proposals by integrating a coarse 3D lattice with the Structure-from-Motion (SfM) camera graph, followed by a visibility-aware, coverage-driven selection strategy. By utilizing 3D Gaussian Splatting (3DGS), our framework enables high-throughput, scene-specific synthesis within practical computational budgets. Experiments on standard benchmarks and an in-house dataset demonstrate that LEGS consistently improves pose accuracy and robustness, particularly in scenarios characterized by sparse sampling and co-located viewpoints.</p>
	]]></content:encoded>

	<dc:title>LEGS: Visual Localization Enhanced by 3D Gaussian Splatting</dc:title>
			<dc:creator>Daewoon Kim</dc:creator>
			<dc:creator>I-gil Kim</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020084</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-16</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-16</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>84</prism:startingPage>
		<prism:doi>10.3390/jimaging12020084</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/84</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/83">

	<title>J. Imaging, Vol. 12, Pages 83: 3D Road Defect Mapping via Differentiable Neural Rendering and Multi-Frame Semantic Fusion in Bird&amp;rsquo;s-Eye-View Space</title>
	<link>https://www.mdpi.com/2313-433X/12/2/83</link>
	<description>Road defect detection is essential for traffic safety and infrastructure maintenance. Excising automated methods based on 2D image analysis lack spatial context and cannot provide accurate 3D localization required for maintenance planning. We propose a novel framework for road defect mapping from monocular video sequences by integrating differentiable Bird&amp;amp;rsquo;s-Eye-View (BEV) mesh representation, semantic filtering, and multi-frame temporal fusion. Our differentiable mesh-based BEV representation enables efficient scene reconstruction from sparse observations through MLP-based optimization. The semantic filtering strategy leverages road surface segmentation to eliminate off-road false positives, reducing detection errors by 33.7%. Multi-frame fusion with ray-casting projection and exponential moving average update accumulates defect observations across frames while maintaining 3D geometric consistency. Experimental results demonstrate that our framework produces geometrically consistent BEV defect maps with superior accuracy compared to single-frame 2D methods, effectively handling occlusions, motion blur, and varying illumination conditions.</description>
	<pubDate>2026-02-15</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 83: 3D Road Defect Mapping via Differentiable Neural Rendering and Multi-Frame Semantic Fusion in Bird&amp;rsquo;s-Eye-View Space</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/83">doi: 10.3390/jimaging12020083</a></p>
	<p>Authors:
		Hongjia Xing
		Feng Yang
		</p>
	<p>Road defect detection is essential for traffic safety and infrastructure maintenance. Excising automated methods based on 2D image analysis lack spatial context and cannot provide accurate 3D localization required for maintenance planning. We propose a novel framework for road defect mapping from monocular video sequences by integrating differentiable Bird&amp;amp;rsquo;s-Eye-View (BEV) mesh representation, semantic filtering, and multi-frame temporal fusion. Our differentiable mesh-based BEV representation enables efficient scene reconstruction from sparse observations through MLP-based optimization. The semantic filtering strategy leverages road surface segmentation to eliminate off-road false positives, reducing detection errors by 33.7%. Multi-frame fusion with ray-casting projection and exponential moving average update accumulates defect observations across frames while maintaining 3D geometric consistency. Experimental results demonstrate that our framework produces geometrically consistent BEV defect maps with superior accuracy compared to single-frame 2D methods, effectively handling occlusions, motion blur, and varying illumination conditions.</p>
	]]></content:encoded>

	<dc:title>3D Road Defect Mapping via Differentiable Neural Rendering and Multi-Frame Semantic Fusion in Bird&amp;amp;rsquo;s-Eye-View Space</dc:title>
			<dc:creator>Hongjia Xing</dc:creator>
			<dc:creator>Feng Yang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020083</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-15</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-15</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>83</prism:startingPage>
		<prism:doi>10.3390/jimaging12020083</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/83</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/82">

	<title>J. Imaging, Vol. 12, Pages 82: Research Progress on the Application of Radiomics and Deep Learning in Liver Fibrosis</title>
	<link>https://www.mdpi.com/2313-433X/12/2/82</link>
	<description>Liver fibrosis (LF) represents a crucial intermediate stage in the pathological progression from chronic liver disease to cirrhosis and hepatocellular carcinoma. Early and accurate diagnosis is of vital importance for the intervention treatment of diseases and the improvement of prognosis. Traditional liver biopsy, long regarded as the diagnostic gold standard, remains associated with several notable limitations such as invasiveness, sampling errors and inter-observer variability. Lately, as artificial intelligence (AI) technology progresses swiftly, radiomics and deep learning (DL) have risen to prominence as non-invasive diagnostic instruments, showing significant potential in the LF diagnostic evaluation. This review summarizes the latest advancements in radiomics and DL for LF diagnosis, staging, prognosis prediction and etiological differentiation. It also analyzes the application value of multimodal imaging modalities, including magnetic resonance imaging (MRI), computed tomography (CT) and ultrasound in this field. Despite ongoing challenges in model generalization and standardization, improved model interpretability, technological integration and multimodal fusion, the continuous advancement of radiomics and DL technologies holds promise for AI-driven imaging analysis strategies. These approaches aim to integrate multiple clinical monitoring methods, overcome obstacles in the early LF diagnosis and treatment and provide new perspectives for precision medicine of this disease.</description>
	<pubDate>2026-02-15</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 82: Research Progress on the Application of Radiomics and Deep Learning in Liver Fibrosis</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/82">doi: 10.3390/jimaging12020082</a></p>
	<p>Authors:
		Yi Dang
		Wenjing Li
		Zhao Liu
		Junqiang Lei
		</p>
	<p>Liver fibrosis (LF) represents a crucial intermediate stage in the pathological progression from chronic liver disease to cirrhosis and hepatocellular carcinoma. Early and accurate diagnosis is of vital importance for the intervention treatment of diseases and the improvement of prognosis. Traditional liver biopsy, long regarded as the diagnostic gold standard, remains associated with several notable limitations such as invasiveness, sampling errors and inter-observer variability. Lately, as artificial intelligence (AI) technology progresses swiftly, radiomics and deep learning (DL) have risen to prominence as non-invasive diagnostic instruments, showing significant potential in the LF diagnostic evaluation. This review summarizes the latest advancements in radiomics and DL for LF diagnosis, staging, prognosis prediction and etiological differentiation. It also analyzes the application value of multimodal imaging modalities, including magnetic resonance imaging (MRI), computed tomography (CT) and ultrasound in this field. Despite ongoing challenges in model generalization and standardization, improved model interpretability, technological integration and multimodal fusion, the continuous advancement of radiomics and DL technologies holds promise for AI-driven imaging analysis strategies. These approaches aim to integrate multiple clinical monitoring methods, overcome obstacles in the early LF diagnosis and treatment and provide new perspectives for precision medicine of this disease.</p>
	]]></content:encoded>

	<dc:title>Research Progress on the Application of Radiomics and Deep Learning in Liver Fibrosis</dc:title>
			<dc:creator>Yi Dang</dc:creator>
			<dc:creator>Wenjing Li</dc:creator>
			<dc:creator>Zhao Liu</dc:creator>
			<dc:creator>Junqiang Lei</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020082</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-15</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-15</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Review</prism:section>
	<prism:startingPage>82</prism:startingPage>
		<prism:doi>10.3390/jimaging12020082</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/82</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/81">

	<title>J. Imaging, Vol. 12, Pages 81: Automatic Childhood Pneumonia Diagnosis Based on Multi-Model Feature Fusion Using Chi-Square Feature Selection</title>
	<link>https://www.mdpi.com/2313-433X/12/2/81</link>
	<description>Pneumonia is one of the main reasons for child mortality, with chest radiography (CXR) being essential for its diagnosis. However, the low radiation exposure in pediatric analysis complicates the accurate detection of pneumonia, making traditional examination ineffective. Progress in medical imaging with convolutional neural networks (CNN) has considerably improved performance, gaining widespread recognition for its effectiveness. This paper proposes an accurate pneumonia detection method based on different deep CNN architectures that combine optimal feature fusion. Enhanced VGG-19, ResNet-50, and MobileNet-V2 are trained on the most widely used pneumonia dataset, applying appropriate transfer learning and fine-tuning strategies. To create an effective feature input, the Chi-Square technique removes inappropriate features from every enhanced CNN. The resulting subsets are subsequently fused horizontally, to generate more diverse and robust feature representation for binary classification. By combining 1000 best features from VGG-19 and MobileNet-V2 models, the suggested approach records the best accuracy (97.59%), Recall (98.33%), and F1-score (98.19%) on the test set based on the supervised support vector machines (SVM) classifier. The achieved results demonstrated that our approach provides a significant enhancement in performance compared to previous studies using various ensemble fusion techniques while ensuring computational efficiency. We project this fused-feature system to significantly aid timely detection of childhood pneumonia, especially within constrained healthcare systems.</description>
	<pubDate>2026-02-14</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 81: Automatic Childhood Pneumonia Diagnosis Based on Multi-Model Feature Fusion Using Chi-Square Feature Selection</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/81">doi: 10.3390/jimaging12020081</a></p>
	<p>Authors:
		Amira Ouerhani
		Tareq Hadidi
		Hanene Sahli
		Halima Mahjoubi
		</p>
	<p>Pneumonia is one of the main reasons for child mortality, with chest radiography (CXR) being essential for its diagnosis. However, the low radiation exposure in pediatric analysis complicates the accurate detection of pneumonia, making traditional examination ineffective. Progress in medical imaging with convolutional neural networks (CNN) has considerably improved performance, gaining widespread recognition for its effectiveness. This paper proposes an accurate pneumonia detection method based on different deep CNN architectures that combine optimal feature fusion. Enhanced VGG-19, ResNet-50, and MobileNet-V2 are trained on the most widely used pneumonia dataset, applying appropriate transfer learning and fine-tuning strategies. To create an effective feature input, the Chi-Square technique removes inappropriate features from every enhanced CNN. The resulting subsets are subsequently fused horizontally, to generate more diverse and robust feature representation for binary classification. By combining 1000 best features from VGG-19 and MobileNet-V2 models, the suggested approach records the best accuracy (97.59%), Recall (98.33%), and F1-score (98.19%) on the test set based on the supervised support vector machines (SVM) classifier. The achieved results demonstrated that our approach provides a significant enhancement in performance compared to previous studies using various ensemble fusion techniques while ensuring computational efficiency. We project this fused-feature system to significantly aid timely detection of childhood pneumonia, especially within constrained healthcare systems.</p>
	]]></content:encoded>

	<dc:title>Automatic Childhood Pneumonia Diagnosis Based on Multi-Model Feature Fusion Using Chi-Square Feature Selection</dc:title>
			<dc:creator>Amira Ouerhani</dc:creator>
			<dc:creator>Tareq Hadidi</dc:creator>
			<dc:creator>Hanene Sahli</dc:creator>
			<dc:creator>Halima Mahjoubi</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020081</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-14</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-14</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>81</prism:startingPage>
		<prism:doi>10.3390/jimaging12020081</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/81</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/80">

	<title>J. Imaging, Vol. 12, Pages 80: Confidence-Guided Adaptive Diffusion Network for Medical Image Classification</title>
	<link>https://www.mdpi.com/2313-433X/12/2/80</link>
	<description>Medical image classification is a fundamental task in medical image analysis and underpins a wide range of clinical applications, including dermatological screening, retinal disease assessment, and malignant tissue detection. In recent years, diffusion models have demonstrated promising potential for medical image classification owing to their strong representation learning capability. However, existing diffusion-based classification methods often rely on oversimplified prior modeling strategies, which fail to adequately capture the intrinsic multi-scale semantic information and contextual dependencies inherent in medical images. As a result, the discriminative power and stability of feature representations are constrained in complex scenarios. In addition, fixed noise injection strategies neglect variations in sample-level prediction confidence, leading to uniform perturbations being imposed on samples with different levels of semantic reliability during the diffusion process, which in turn limits the model&amp;amp;rsquo;s discriminative performance and generalization ability. To address these challenges, this paper proposes a Confidence-Guided Adaptive Diffusion Network (CGAD-Net) for medical image classification. Specifically, a hybrid prior modeling framework is introduced, consisting of a Hierarchical Pyramid Context Modeling (HPCM) module and an Intra-Scale Dilated Convolution Refinement (IDCR) module. These two components jointly enable the diffusion-based feature modeling process to effectively capture fine-grained structural details and global contextual semantic information. Furthermore, a Confidence-Guided Adaptive Noise Injection (CG-ANI) strategy is designed to dynamically regulate noise intensity during the diffusion process according to sample-level prediction confidence. Without altering the underlying discriminative objective, CG-ANI stabilizes model training and enhances robust representation learning for semantically ambiguous samples.Experimental results on multiple public medical image classification benchmarks, including HAM10000, APTOS2019, and Chaoyang, demonstrate that CGAD-Net achieves competitive performance in terms of classification accuracy, robustness, and training stability. These results validate the effectiveness and application potential of confidence-guided diffusion modeling for two-dimensional medical image classification tasks, and provide valuable insights for further research on diffusion models in the field of medical image analysis.</description>
	<pubDate>2026-02-14</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 80: Confidence-Guided Adaptive Diffusion Network for Medical Image Classification</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/80">doi: 10.3390/jimaging12020080</a></p>
	<p>Authors:
		Yang Yan
		Zhuo Xie
		Wenbo Huang
		</p>
	<p>Medical image classification is a fundamental task in medical image analysis and underpins a wide range of clinical applications, including dermatological screening, retinal disease assessment, and malignant tissue detection. In recent years, diffusion models have demonstrated promising potential for medical image classification owing to their strong representation learning capability. However, existing diffusion-based classification methods often rely on oversimplified prior modeling strategies, which fail to adequately capture the intrinsic multi-scale semantic information and contextual dependencies inherent in medical images. As a result, the discriminative power and stability of feature representations are constrained in complex scenarios. In addition, fixed noise injection strategies neglect variations in sample-level prediction confidence, leading to uniform perturbations being imposed on samples with different levels of semantic reliability during the diffusion process, which in turn limits the model&amp;amp;rsquo;s discriminative performance and generalization ability. To address these challenges, this paper proposes a Confidence-Guided Adaptive Diffusion Network (CGAD-Net) for medical image classification. Specifically, a hybrid prior modeling framework is introduced, consisting of a Hierarchical Pyramid Context Modeling (HPCM) module and an Intra-Scale Dilated Convolution Refinement (IDCR) module. These two components jointly enable the diffusion-based feature modeling process to effectively capture fine-grained structural details and global contextual semantic information. Furthermore, a Confidence-Guided Adaptive Noise Injection (CG-ANI) strategy is designed to dynamically regulate noise intensity during the diffusion process according to sample-level prediction confidence. Without altering the underlying discriminative objective, CG-ANI stabilizes model training and enhances robust representation learning for semantically ambiguous samples.Experimental results on multiple public medical image classification benchmarks, including HAM10000, APTOS2019, and Chaoyang, demonstrate that CGAD-Net achieves competitive performance in terms of classification accuracy, robustness, and training stability. These results validate the effectiveness and application potential of confidence-guided diffusion modeling for two-dimensional medical image classification tasks, and provide valuable insights for further research on diffusion models in the field of medical image analysis.</p>
	]]></content:encoded>

	<dc:title>Confidence-Guided Adaptive Diffusion Network for Medical Image Classification</dc:title>
			<dc:creator>Yang Yan</dc:creator>
			<dc:creator>Zhuo Xie</dc:creator>
			<dc:creator>Wenbo Huang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020080</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-14</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-14</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>80</prism:startingPage>
		<prism:doi>10.3390/jimaging12020080</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/80</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/79">

	<title>J. Imaging, Vol. 12, Pages 79: Progressive Upsampling Generative Adversarial Network with Collaborative Attention for Single-Image Super-Resolution</title>
	<link>https://www.mdpi.com/2313-433X/12/2/79</link>
	<description>Single-image super-resolution (SISR) is an essential low-level visual task that aims to produce high-resolution images from low-resolution inputs. However, most existing SISR methods heavily rely on ideal degradation kernels and rarely consider the actual noise distribution. To tackle these issues, this paper presents a progressive upsampling generative adversarial network with collaborative attention mechanism called PUGAN. Specifically, the residual multiscale blocks (RMBs) based on stacked mixed-pooling multiscale structures (MPMSs) is designed to make full use of multiscale global&amp;amp;ndash;local hierarchical features, and the frequency collaborative attention mechanism (CAM) is used to fully dig up high- and low-frequency characteristics. Meanwhile, we design a progressive upsampling strategy to guide the model&amp;amp;rsquo;s learning better while reducing the model&amp;amp;rsquo;s complexity. Finally, the discriminator is also used to evaluate the reconstructed high-resolution images for balancing super-resolution reconstruction and detail enhancement. Our PUGAN can yield comparable PSNR/SSIM/LPIPS values for the NTIRE 2020, Urban 100, and B100 datasets, whose values are 33.987/0.9673/0.1210, 32.966/0.9483/0.1431, and 33.627/0.9546/0.1354 for the scale factor of &amp;amp;times;2 as well as 26.349/0.8721/0.1975, 26.110/0.8614/0.1983, and 26.306/0.8803/0.1978 for the scale factor of &amp;amp;times;4, respectively. Extensive experiments demonstrate that our PUGAN outperforms state-of-the-art SISR methods in qualitative and quantitative assessments for the SISR task. Additionally, our PUGAN shows the potential benefits to pathological image super-resolution.</description>
	<pubDate>2026-02-11</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 79: Progressive Upsampling Generative Adversarial Network with Collaborative Attention for Single-Image Super-Resolution</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/79">doi: 10.3390/jimaging12020079</a></p>
	<p>Authors:
		Haoxiang Lu
		Jing Zhang
		Mengyuan Jing
		Ziming Wang
		Wenhao Wang
		</p>
	<p>Single-image super-resolution (SISR) is an essential low-level visual task that aims to produce high-resolution images from low-resolution inputs. However, most existing SISR methods heavily rely on ideal degradation kernels and rarely consider the actual noise distribution. To tackle these issues, this paper presents a progressive upsampling generative adversarial network with collaborative attention mechanism called PUGAN. Specifically, the residual multiscale blocks (RMBs) based on stacked mixed-pooling multiscale structures (MPMSs) is designed to make full use of multiscale global&amp;amp;ndash;local hierarchical features, and the frequency collaborative attention mechanism (CAM) is used to fully dig up high- and low-frequency characteristics. Meanwhile, we design a progressive upsampling strategy to guide the model&amp;amp;rsquo;s learning better while reducing the model&amp;amp;rsquo;s complexity. Finally, the discriminator is also used to evaluate the reconstructed high-resolution images for balancing super-resolution reconstruction and detail enhancement. Our PUGAN can yield comparable PSNR/SSIM/LPIPS values for the NTIRE 2020, Urban 100, and B100 datasets, whose values are 33.987/0.9673/0.1210, 32.966/0.9483/0.1431, and 33.627/0.9546/0.1354 for the scale factor of &amp;amp;times;2 as well as 26.349/0.8721/0.1975, 26.110/0.8614/0.1983, and 26.306/0.8803/0.1978 for the scale factor of &amp;amp;times;4, respectively. Extensive experiments demonstrate that our PUGAN outperforms state-of-the-art SISR methods in qualitative and quantitative assessments for the SISR task. Additionally, our PUGAN shows the potential benefits to pathological image super-resolution.</p>
	]]></content:encoded>

	<dc:title>Progressive Upsampling Generative Adversarial Network with Collaborative Attention for Single-Image Super-Resolution</dc:title>
			<dc:creator>Haoxiang Lu</dc:creator>
			<dc:creator>Jing Zhang</dc:creator>
			<dc:creator>Mengyuan Jing</dc:creator>
			<dc:creator>Ziming Wang</dc:creator>
			<dc:creator>Wenhao Wang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020079</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-11</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-11</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>79</prism:startingPage>
		<prism:doi>10.3390/jimaging12020079</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/79</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/78">

	<title>J. Imaging, Vol. 12, Pages 78: Age Prediction of Hematoma from Hyperspectral Images Using Convolutional Neural Networks</title>
	<link>https://www.mdpi.com/2313-433X/12/2/78</link>
	<description>Accurate estimation of hematoma age remains a major challenge in forensic practice, as current assessments rely heavily on subjective visual interpretation. Hyperspectral imaging (HSI) captures rich spectral signatures that may reflect the biochemical evolution of hematomas over time. This study evaluates whether a convolutional neural network (CNN) integrating both spectral and spatial information improves hematoma age estimation accuracy. Additionally, we investigate whether performance can be maintained using a reduced, physiologically motivated subset of wavelengths. Using a dataset of forearm hematomas from 25 participants, we applied radiometric normalization and SAM-based segmentation to extract 64&amp;amp;times;64&amp;amp;times;204 hyperspectral patches. In leave-one-subject-out cross-validation, the CNN outperformed a spectral-only Lasso baseline, reducing the mean absolute error (MAE) from 3.24 days to 2.29 days. Band-importance analysis combining SmoothGrad and occlusion sensitivity identified 20 highly informative wavelengths; using only these bands matched or exceeded the accuracy of the full 204-band model across early, middle, and late hematoma stages. These results demonstrate that spectral&amp;amp;ndash;spatial modeling and physiologically grounded band selection can enhance estimation accuracy while significantly reducing data dimensionality. This approach supports the development of compact multispectral systems for objective clinical and forensic evaluation.</description>
	<pubDate>2026-02-11</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 78: Age Prediction of Hematoma from Hyperspectral Images Using Convolutional Neural Networks</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/78">doi: 10.3390/jimaging12020078</a></p>
	<p>Authors:
		Arash Keshavarz
		Gerald Bieber
		Daniel Wulff
		Carsten Babian
		Stefan Lüdtke
		</p>
	<p>Accurate estimation of hematoma age remains a major challenge in forensic practice, as current assessments rely heavily on subjective visual interpretation. Hyperspectral imaging (HSI) captures rich spectral signatures that may reflect the biochemical evolution of hematomas over time. This study evaluates whether a convolutional neural network (CNN) integrating both spectral and spatial information improves hematoma age estimation accuracy. Additionally, we investigate whether performance can be maintained using a reduced, physiologically motivated subset of wavelengths. Using a dataset of forearm hematomas from 25 participants, we applied radiometric normalization and SAM-based segmentation to extract 64&amp;amp;times;64&amp;amp;times;204 hyperspectral patches. In leave-one-subject-out cross-validation, the CNN outperformed a spectral-only Lasso baseline, reducing the mean absolute error (MAE) from 3.24 days to 2.29 days. Band-importance analysis combining SmoothGrad and occlusion sensitivity identified 20 highly informative wavelengths; using only these bands matched or exceeded the accuracy of the full 204-band model across early, middle, and late hematoma stages. These results demonstrate that spectral&amp;amp;ndash;spatial modeling and physiologically grounded band selection can enhance estimation accuracy while significantly reducing data dimensionality. This approach supports the development of compact multispectral systems for objective clinical and forensic evaluation.</p>
	]]></content:encoded>

	<dc:title>Age Prediction of Hematoma from Hyperspectral Images Using Convolutional Neural Networks</dc:title>
			<dc:creator>Arash Keshavarz</dc:creator>
			<dc:creator>Gerald Bieber</dc:creator>
			<dc:creator>Daniel Wulff</dc:creator>
			<dc:creator>Carsten Babian</dc:creator>
			<dc:creator>Stefan Lüdtke</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020078</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-11</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-11</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>78</prism:startingPage>
		<prism:doi>10.3390/jimaging12020078</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/78</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/77">

	<title>J. Imaging, Vol. 12, Pages 77: Correction: Jiang et al. Double-Gated Mamba Multi-Scale Adaptive Feature Learning Network for Unsupervised Single RGB Image Hyperspectral Image Reconstruction. J. Imaging 2026, 12, 19</title>
	<link>https://www.mdpi.com/2313-433X/12/2/77</link>
	<description>There were two errors in the original publication [...]</description>
	<pubDate>2026-02-11</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 77: Correction: Jiang et al. Double-Gated Mamba Multi-Scale Adaptive Feature Learning Network for Unsupervised Single RGB Image Hyperspectral Image Reconstruction. J. Imaging 2026, 12, 19</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/77">doi: 10.3390/jimaging12020077</a></p>
	<p>Authors:
		Zhongmin Jiang
		Zhen Wang
		Wenju Wang
		Jifan Zhu
		</p>
	<p>There were two errors in the original publication [...]</p>
	]]></content:encoded>

	<dc:title>Correction: Jiang et al. Double-Gated Mamba Multi-Scale Adaptive Feature Learning Network for Unsupervised Single RGB Image Hyperspectral Image Reconstruction. J. Imaging 2026, 12, 19</dc:title>
			<dc:creator>Zhongmin Jiang</dc:creator>
			<dc:creator>Zhen Wang</dc:creator>
			<dc:creator>Wenju Wang</dc:creator>
			<dc:creator>Jifan Zhu</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020077</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-11</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-11</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Correction</prism:section>
	<prism:startingPage>77</prism:startingPage>
		<prism:doi>10.3390/jimaging12020077</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/77</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/76">

	<title>J. Imaging, Vol. 12, Pages 76: A Multiphase CT-Based Integrated Deep Learning Framework for Rectal Cancer Detection, Segmentation, and Staging: Performance Comparison with Radiologist Assessment</title>
	<link>https://www.mdpi.com/2313-433X/12/2/76</link>
	<description>Accurate staging of rectal cancer is crucial for treatment planning; however, computed tomography (CT) interpretation remains challenging and highly dependent on radiologist expertise. This study aimed to develop and evaluate an AI-assisted system for rectal cancer detection and staging using CT images. The proposed framework integrates three components&amp;amp;mdash;a convolutional neural network (RCD-CNN) for lesion detection, a U-Net model for rectal contour delineation and tumor localization, and a 3D convolutional network (RCS-3DCNN) for staging prediction. CT scans from 223 rectal cancer patients at Kaohsiung Medical University Chung-Ho Memorial Hospital were retrospectively analyzed, including both non-contrast and contrast-enhanced studies. RCD-CNN achieved an accuracy of 0.976, recall of 0.975, and precision of 0.976. U-Net yielded Dice scores of 0.897 (rectal contours) and 0.856 (tumor localization). Radiologist-based clinical staging had 82.6% concordance with pathology, while AI-based staging achieved 80.4%. McNemar&amp;amp;rsquo;s test showed no significant difference between the AI and radiologist staging results (p = 1.0). The proposed AI-assisted system achieved staging accuracy comparable to that of radiologists and demonstrated feasibility as a decision-support tool in rectal cancer management. This study introduces a novel three-stage, dual-phase CT-based AI framework that integrates lesion detection, segmentation, and staging within a unified workflow.</description>
	<pubDate>2026-02-10</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 76: A Multiphase CT-Based Integrated Deep Learning Framework for Rectal Cancer Detection, Segmentation, and Staging: Performance Comparison with Radiologist Assessment</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/76">doi: 10.3390/jimaging12020076</a></p>
	<p>Authors:
		Tzu-Hsueh Tsai
		Jia-Hui Lin
		Yen-Te Liu
		Jhing-Fa Wang
		Chien-Hung Lee
		Chiao-Yun Chen
		</p>
	<p>Accurate staging of rectal cancer is crucial for treatment planning; however, computed tomography (CT) interpretation remains challenging and highly dependent on radiologist expertise. This study aimed to develop and evaluate an AI-assisted system for rectal cancer detection and staging using CT images. The proposed framework integrates three components&amp;amp;mdash;a convolutional neural network (RCD-CNN) for lesion detection, a U-Net model for rectal contour delineation and tumor localization, and a 3D convolutional network (RCS-3DCNN) for staging prediction. CT scans from 223 rectal cancer patients at Kaohsiung Medical University Chung-Ho Memorial Hospital were retrospectively analyzed, including both non-contrast and contrast-enhanced studies. RCD-CNN achieved an accuracy of 0.976, recall of 0.975, and precision of 0.976. U-Net yielded Dice scores of 0.897 (rectal contours) and 0.856 (tumor localization). Radiologist-based clinical staging had 82.6% concordance with pathology, while AI-based staging achieved 80.4%. McNemar&amp;amp;rsquo;s test showed no significant difference between the AI and radiologist staging results (p = 1.0). The proposed AI-assisted system achieved staging accuracy comparable to that of radiologists and demonstrated feasibility as a decision-support tool in rectal cancer management. This study introduces a novel three-stage, dual-phase CT-based AI framework that integrates lesion detection, segmentation, and staging within a unified workflow.</p>
	]]></content:encoded>

	<dc:title>A Multiphase CT-Based Integrated Deep Learning Framework for Rectal Cancer Detection, Segmentation, and Staging: Performance Comparison with Radiologist Assessment</dc:title>
			<dc:creator>Tzu-Hsueh Tsai</dc:creator>
			<dc:creator>Jia-Hui Lin</dc:creator>
			<dc:creator>Yen-Te Liu</dc:creator>
			<dc:creator>Jhing-Fa Wang</dc:creator>
			<dc:creator>Chien-Hung Lee</dc:creator>
			<dc:creator>Chiao-Yun Chen</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020076</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-10</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-10</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>76</prism:startingPage>
		<prism:doi>10.3390/jimaging12020076</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/76</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/75">

	<title>J. Imaging, Vol. 12, Pages 75: Robust Detection and Localization of Image Copy-Move Forgery Using Multi-Feature Fusion</title>
	<link>https://www.mdpi.com/2313-433X/12/2/75</link>
	<description>Copy-move forgery detection (CMFD) is a crucial image forensics analysis technique. The rapid development of deep learning algorithms has led to impressive advancements in CMFD. However, existing models suffer from two key limitations: Their feature fusion modules insufficiently exploit the complementary nature of features from the RGB domain and noise domain, resulting in suboptimal feature representations. During decoding, they simply classify pixels as authentic or forged, without aggregating cross-layer information or integrating local and global attention mechanisms, leading to unsatisfactory detection precision. To overcome these limitations, a robust detection and localization approach to image copy-move forgery using multi-feature fusion is proposed. Firstly, a Multi-Feature Fusion Network (MFFNet) was designed. Within its feature fusion module, features from both the RGB domain and noise domain were fused to enable mutual complementarity between distinct characteristics, yielding richer feature information. Then, a Lightweight Multi-layer Perceptron Decoder (LMPD) was developed for image reconstruction and forgery localization map generation. Finally, by aggregating information from different layers and combining local and global attention mechanisms, more accurate prediction masks were obtained. The experimental results demonstrate that the proposed MFFNet model exhibits enhanced robustness and superior detection and localization performance compared to existing methods when faced with JPEG compression, noise addition, and resizing operations.</description>
	<pubDate>2026-02-10</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 75: Robust Detection and Localization of Image Copy-Move Forgery Using Multi-Feature Fusion</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/75">doi: 10.3390/jimaging12020075</a></p>
	<p>Authors:
		Kaiqi Lu
		Qiuyu Zhang
		</p>
	<p>Copy-move forgery detection (CMFD) is a crucial image forensics analysis technique. The rapid development of deep learning algorithms has led to impressive advancements in CMFD. However, existing models suffer from two key limitations: Their feature fusion modules insufficiently exploit the complementary nature of features from the RGB domain and noise domain, resulting in suboptimal feature representations. During decoding, they simply classify pixels as authentic or forged, without aggregating cross-layer information or integrating local and global attention mechanisms, leading to unsatisfactory detection precision. To overcome these limitations, a robust detection and localization approach to image copy-move forgery using multi-feature fusion is proposed. Firstly, a Multi-Feature Fusion Network (MFFNet) was designed. Within its feature fusion module, features from both the RGB domain and noise domain were fused to enable mutual complementarity between distinct characteristics, yielding richer feature information. Then, a Lightweight Multi-layer Perceptron Decoder (LMPD) was developed for image reconstruction and forgery localization map generation. Finally, by aggregating information from different layers and combining local and global attention mechanisms, more accurate prediction masks were obtained. The experimental results demonstrate that the proposed MFFNet model exhibits enhanced robustness and superior detection and localization performance compared to existing methods when faced with JPEG compression, noise addition, and resizing operations.</p>
	]]></content:encoded>

	<dc:title>Robust Detection and Localization of Image Copy-Move Forgery Using Multi-Feature Fusion</dc:title>
			<dc:creator>Kaiqi Lu</dc:creator>
			<dc:creator>Qiuyu Zhang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020075</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-10</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-10</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>75</prism:startingPage>
		<prism:doi>10.3390/jimaging12020075</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/75</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/74">

	<title>J. Imaging, Vol. 12, Pages 74: LDFSAM: Localization Distillation-Enhanced Feature Prompting SAM for Medical Image Segmentation</title>
	<link>https://www.mdpi.com/2313-433X/12/2/74</link>
	<description>Standard SAM-based approaches in medical imaging typically rely on explicit geometric prompts, such as bounding boxes or points. However, these rigid spatial constraints are often insufficient for capturing the complex, deformable boundaries of medical structures, where localization noise easily propagates into segmentation errors. To overcome this, we propose the Localization Distillation-Enhanced Feature Prompting SAM (LDFSAM), a novel framework that shifts from discrete coordinate inputs to a latent feature prompting paradigm. We employ a lightweight prompt generator, refined via Localization Distillation (LD), to inject multi-scale features into the SAM decoder as complementary Dense Feature Prompts (DFPs) and Sparse Feature Prompts (SFPs). This effectively guides segmentation without explicit box constraints. Extensive experiments on four public benchmarks (3D CBCT Tooth, ISIC 2018, MMOTU, and Kvasir-SEG) demonstrate that LDFSAM outperforms both prior SAM-based baselines and conventional networks, achieving Dice scores exceeding 0.91. Further validation on an in-house cohort demonstrates its robust generalization capabilities. Overall, our method outperforms both prior SAM-based baselines and conventional networks, with particularly strong gains in low-data regimes, providing a reliable solution for automated medical image segmentation.</description>
	<pubDate>2026-02-10</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 74: LDFSAM: Localization Distillation-Enhanced Feature Prompting SAM for Medical Image Segmentation</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/74">doi: 10.3390/jimaging12020074</a></p>
	<p>Authors:
		Xuanbo Zhao
		Cheng Wang
		Huaxing Xu
		Hong Zhou
		Zekuan Yu
		Tao Chen
		Xiaoling Wei
		Rongjun Zhang
		</p>
	<p>Standard SAM-based approaches in medical imaging typically rely on explicit geometric prompts, such as bounding boxes or points. However, these rigid spatial constraints are often insufficient for capturing the complex, deformable boundaries of medical structures, where localization noise easily propagates into segmentation errors. To overcome this, we propose the Localization Distillation-Enhanced Feature Prompting SAM (LDFSAM), a novel framework that shifts from discrete coordinate inputs to a latent feature prompting paradigm. We employ a lightweight prompt generator, refined via Localization Distillation (LD), to inject multi-scale features into the SAM decoder as complementary Dense Feature Prompts (DFPs) and Sparse Feature Prompts (SFPs). This effectively guides segmentation without explicit box constraints. Extensive experiments on four public benchmarks (3D CBCT Tooth, ISIC 2018, MMOTU, and Kvasir-SEG) demonstrate that LDFSAM outperforms both prior SAM-based baselines and conventional networks, achieving Dice scores exceeding 0.91. Further validation on an in-house cohort demonstrates its robust generalization capabilities. Overall, our method outperforms both prior SAM-based baselines and conventional networks, with particularly strong gains in low-data regimes, providing a reliable solution for automated medical image segmentation.</p>
	]]></content:encoded>

	<dc:title>LDFSAM: Localization Distillation-Enhanced Feature Prompting SAM for Medical Image Segmentation</dc:title>
			<dc:creator>Xuanbo Zhao</dc:creator>
			<dc:creator>Cheng Wang</dc:creator>
			<dc:creator>Huaxing Xu</dc:creator>
			<dc:creator>Hong Zhou</dc:creator>
			<dc:creator>Zekuan Yu</dc:creator>
			<dc:creator>Tao Chen</dc:creator>
			<dc:creator>Xiaoling Wei</dc:creator>
			<dc:creator>Rongjun Zhang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020074</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-10</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-10</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>74</prism:startingPage>
		<prism:doi>10.3390/jimaging12020074</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/74</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/73">

	<title>J. Imaging, Vol. 12, Pages 73: Assessing Impact of Data Quality in Early Post-Operative Glioblastoma Segmentation</title>
	<link>https://www.mdpi.com/2313-433X/12/2/73</link>
	<description>Quantification of the residual tumor from early post-operative magnetic resonance imaging (MRI) is essential in follow-up and treatment planning for glioblastoma patients. Residual tumor segmentation from early post-operative MRI is particularly challenging compared to the closely related task of pre-operative segmentation, as the tumor lesions are small, fragmented, and easily confounded with noise in the resection cavity. Recently, several studies successfully trained deep learning models for early post-operative segmentation, yet with subpar performances compared to the analogous task pre-operatively. In this study, the impact of image and annotation quality on model training and performance in early post-operative glioblastoma segmentation was assessed. A dataset consisting of early post-operative MRI scans from 423 patients and two hospitals in Norway and Sweden was assembled, for which image and annotation qualities were evaluated by expert neurosurgeons. The Attention U-Net architecture was trained with five-fold cross-validation on different quality-based subsets of the dataset in order to evaluate the impact of training data quality on model performance. Including low-quality images in the training set did not deteriorate performance on high-quality images. However, models trained on exclusively high-quality images did not generalize to low-quality images. Models trained on exclusively high-quality annotations reached the same performance level as the models trained on the entire dataset, using only two-thirds of the dataset. Both image and annotation quality had a significant impact on model performance. In dataset curation, images should ideally be representative of the quality variations in the real-world clinical scenario, and efforts should be made to ensure exact ground truth annotations of high quality.</description>
	<pubDate>2026-02-10</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 73: Assessing Impact of Data Quality in Early Post-Operative Glioblastoma Segmentation</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/73">doi: 10.3390/jimaging12020073</a></p>
	<p>Authors:
		Ragnhild Holden Helland
		David Bouget
		Asgeir Store Jakola
		Sébastien Muller
		Ole Solheim
		Ingerid Reinertsen
		</p>
	<p>Quantification of the residual tumor from early post-operative magnetic resonance imaging (MRI) is essential in follow-up and treatment planning for glioblastoma patients. Residual tumor segmentation from early post-operative MRI is particularly challenging compared to the closely related task of pre-operative segmentation, as the tumor lesions are small, fragmented, and easily confounded with noise in the resection cavity. Recently, several studies successfully trained deep learning models for early post-operative segmentation, yet with subpar performances compared to the analogous task pre-operatively. In this study, the impact of image and annotation quality on model training and performance in early post-operative glioblastoma segmentation was assessed. A dataset consisting of early post-operative MRI scans from 423 patients and two hospitals in Norway and Sweden was assembled, for which image and annotation qualities were evaluated by expert neurosurgeons. The Attention U-Net architecture was trained with five-fold cross-validation on different quality-based subsets of the dataset in order to evaluate the impact of training data quality on model performance. Including low-quality images in the training set did not deteriorate performance on high-quality images. However, models trained on exclusively high-quality images did not generalize to low-quality images. Models trained on exclusively high-quality annotations reached the same performance level as the models trained on the entire dataset, using only two-thirds of the dataset. Both image and annotation quality had a significant impact on model performance. In dataset curation, images should ideally be representative of the quality variations in the real-world clinical scenario, and efforts should be made to ensure exact ground truth annotations of high quality.</p>
	]]></content:encoded>

	<dc:title>Assessing Impact of Data Quality in Early Post-Operative Glioblastoma Segmentation</dc:title>
			<dc:creator>Ragnhild Holden Helland</dc:creator>
			<dc:creator>David Bouget</dc:creator>
			<dc:creator>Asgeir Store Jakola</dc:creator>
			<dc:creator>Sébastien Muller</dc:creator>
			<dc:creator>Ole Solheim</dc:creator>
			<dc:creator>Ingerid Reinertsen</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020073</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-10</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-10</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>73</prism:startingPage>
		<prism:doi>10.3390/jimaging12020073</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/73</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/72">

	<title>J. Imaging, Vol. 12, Pages 72: GreenViT: A Vision Transformer with Single-Path Progressive Upsampling for Urban Green-Space Segmentation and Auditable Area Estimation</title>
	<link>https://www.mdpi.com/2313-433X/12/2/72</link>
	<description>Urban green-space monitoring in dense cityscapes remains limited by accuracy&amp;amp;ndash;efficiency trade-offs and the absence of integrated, auditable area estimation. We introduce GreenViT, a Vision Transformer (ViT) based framework for precise segmentation and transparent quantification of urban green space. GreenViT couples a ViT-L/14 backbone with a lightweight single-path, progressive upsampling decoder (Green Head), preserving global context while recovering thin structures. Experiments were conducted on a manually annotated dataset of 20 high-resolution satellite images collected from Satellites.Pro, covering five land-cover classes (background, green space, building, road, and water). Using a 224 &amp;amp;times; 224 sliding window sampling scheme, the 20 images yield 62,650 training/validation patches. Under five-fold evaluation, it attains 0.9200 &amp;amp;plusmn; 0.0243 mIoU, 0.9580 &amp;amp;plusmn; 0.0135 Dice, and 0.9570 PA, and the calibrated estimator achieves 1.10% relative area error. Overall, GreenViT strikes a strong balance between accuracy and efficiency, making it particularly well-suited for thin or boundary-rich classes. It can be used to support planning evaluations, green-space statistics, urban renewal assessments, and ecological red-line verification, while providing reliable green-area metrics to support urban heat mitigation and pollution control efforts. This makes it highly suitable for decision-oriented long-term monitoring and management assessments.</description>
	<pubDate>2026-02-10</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 72: GreenViT: A Vision Transformer with Single-Path Progressive Upsampling for Urban Green-Space Segmentation and Auditable Area Estimation</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/72">doi: 10.3390/jimaging12020072</a></p>
	<p>Authors:
		Ziqiang Xu
		Young Choi
		Changyong Yi
		Chanjeong Park
		Jinyoung Park
		Hyungkeun Park
		Sujeen Song
		</p>
	<p>Urban green-space monitoring in dense cityscapes remains limited by accuracy&amp;amp;ndash;efficiency trade-offs and the absence of integrated, auditable area estimation. We introduce GreenViT, a Vision Transformer (ViT) based framework for precise segmentation and transparent quantification of urban green space. GreenViT couples a ViT-L/14 backbone with a lightweight single-path, progressive upsampling decoder (Green Head), preserving global context while recovering thin structures. Experiments were conducted on a manually annotated dataset of 20 high-resolution satellite images collected from Satellites.Pro, covering five land-cover classes (background, green space, building, road, and water). Using a 224 &amp;amp;times; 224 sliding window sampling scheme, the 20 images yield 62,650 training/validation patches. Under five-fold evaluation, it attains 0.9200 &amp;amp;plusmn; 0.0243 mIoU, 0.9580 &amp;amp;plusmn; 0.0135 Dice, and 0.9570 PA, and the calibrated estimator achieves 1.10% relative area error. Overall, GreenViT strikes a strong balance between accuracy and efficiency, making it particularly well-suited for thin or boundary-rich classes. It can be used to support planning evaluations, green-space statistics, urban renewal assessments, and ecological red-line verification, while providing reliable green-area metrics to support urban heat mitigation and pollution control efforts. This makes it highly suitable for decision-oriented long-term monitoring and management assessments.</p>
	]]></content:encoded>

	<dc:title>GreenViT: A Vision Transformer with Single-Path Progressive Upsampling for Urban Green-Space Segmentation and Auditable Area Estimation</dc:title>
			<dc:creator>Ziqiang Xu</dc:creator>
			<dc:creator>Young Choi</dc:creator>
			<dc:creator>Changyong Yi</dc:creator>
			<dc:creator>Chanjeong Park</dc:creator>
			<dc:creator>Jinyoung Park</dc:creator>
			<dc:creator>Hyungkeun Park</dc:creator>
			<dc:creator>Sujeen Song</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020072</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-10</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-10</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>72</prism:startingPage>
		<prism:doi>10.3390/jimaging12020072</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/72</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/71">

	<title>J. Imaging, Vol. 12, Pages 71: Relationship Between Display Pixel Structure and Gloss Perception</title>
	<link>https://www.mdpi.com/2313-433X/12/2/71</link>
	<description>The demand for accurate representation of gloss perception, which significantly contributes to the impression and evaluation of objects, is increasing owing to recent advancements in display technology enabling high-definition visual reproduction. This study experimentally analyzes the influence of display pixel structure on gloss perception. In a visual evaluation experiment using natural images, gloss perception was assessed across six types of stimuli: three subpixel arrays (RGB, RGBW, and PenTile RGBG) combined with two pixel&amp;amp;ndash;aperture ratios (100% and 50%). The experimental results statistically confirmed that regardless of pixel&amp;amp;ndash;aperture ratio, the RGB subpixel array was perceived as exhibiting the strongest gloss. Furthermore, cluster analysis of observers revealed individual differences in the effect of pixel structure on gloss perception. Additionally, gloss classification and image feature analysis suggested that the magnitude of pixel structure influence varies depending on the frequency components contained in the images. Moreover, analysis using a generalized linear mixed model supported the superiority of the RGB subpixel array even when accounting for variability across observers and natural images.</description>
	<pubDate>2026-02-09</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 71: Relationship Between Display Pixel Structure and Gloss Perception</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/71">doi: 10.3390/jimaging12020071</a></p>
	<p>Authors:
		Kosei Aketagawa
		Midori Tanaka
		Takahiko Horiuchi
		</p>
	<p>The demand for accurate representation of gloss perception, which significantly contributes to the impression and evaluation of objects, is increasing owing to recent advancements in display technology enabling high-definition visual reproduction. This study experimentally analyzes the influence of display pixel structure on gloss perception. In a visual evaluation experiment using natural images, gloss perception was assessed across six types of stimuli: three subpixel arrays (RGB, RGBW, and PenTile RGBG) combined with two pixel&amp;amp;ndash;aperture ratios (100% and 50%). The experimental results statistically confirmed that regardless of pixel&amp;amp;ndash;aperture ratio, the RGB subpixel array was perceived as exhibiting the strongest gloss. Furthermore, cluster analysis of observers revealed individual differences in the effect of pixel structure on gloss perception. Additionally, gloss classification and image feature analysis suggested that the magnitude of pixel structure influence varies depending on the frequency components contained in the images. Moreover, analysis using a generalized linear mixed model supported the superiority of the RGB subpixel array even when accounting for variability across observers and natural images.</p>
	]]></content:encoded>

	<dc:title>Relationship Between Display Pixel Structure and Gloss Perception</dc:title>
			<dc:creator>Kosei Aketagawa</dc:creator>
			<dc:creator>Midori Tanaka</dc:creator>
			<dc:creator>Takahiko Horiuchi</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020071</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-09</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-09</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>71</prism:startingPage>
		<prism:doi>10.3390/jimaging12020071</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/71</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/70">

	<title>J. Imaging, Vol. 12, Pages 70: Topic-Modeling Guided Semantic Clustering for Enhancing CNN-Based Image Classification Using Scale-Invariant Feature Transform and Block Gabor Filtering</title>
	<link>https://www.mdpi.com/2313-433X/12/2/70</link>
	<description>This study proposes a topic-modeling guided framework that enhances image classification by introducing semantic clustering prior to CNN training. Images are processed through two key-point extraction pipelines: Scale-Invariant Feature Transform (SIFT) with Sobel edge detection and Block Gabor Filtering (BGF), to obtain local feature descriptors. These descriptors are clustered using K-means to build a visual vocabulary. Bag of Words histograms then represent each image as a visual document. Latent Dirichlet Allocation is applied to uncover latent semantic topics, generating coherent image clusters. Cluster-specific CNN models, including AlexNet, GoogLeNet, and several ResNet variants, are trained under identical conditions to identify the most suitable architecture for each cluster. Two topic guided integration strategies, the Maximum Proportion Topic (MPT) and the Weight Proportion Topic (WPT), are then used to assign test images to the corresponding specialized model. Experimental results show that both the SIFT-based and BGF-based pipelines outperform non-clustered CNN models and a baseline method using Incremental PCA, K-means, Same-Cluster Prediction, and unweighted Ensemble Voting. The SIFT pipeline achieves the highest accuracy of 95.24% with the MPT strategy, while the BGF pipeline achieves 93.76% with the WPT strategy. These findings confirm that semantic structure introduced through topic modeling substantially improves CNN classification performance.</description>
	<pubDate>2026-02-09</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 70: Topic-Modeling Guided Semantic Clustering for Enhancing CNN-Based Image Classification Using Scale-Invariant Feature Transform and Block Gabor Filtering</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/70">doi: 10.3390/jimaging12020070</a></p>
	<p>Authors:
		Natthaphong Suthamno
		Jessada Tanthanuch
		</p>
	<p>This study proposes a topic-modeling guided framework that enhances image classification by introducing semantic clustering prior to CNN training. Images are processed through two key-point extraction pipelines: Scale-Invariant Feature Transform (SIFT) with Sobel edge detection and Block Gabor Filtering (BGF), to obtain local feature descriptors. These descriptors are clustered using K-means to build a visual vocabulary. Bag of Words histograms then represent each image as a visual document. Latent Dirichlet Allocation is applied to uncover latent semantic topics, generating coherent image clusters. Cluster-specific CNN models, including AlexNet, GoogLeNet, and several ResNet variants, are trained under identical conditions to identify the most suitable architecture for each cluster. Two topic guided integration strategies, the Maximum Proportion Topic (MPT) and the Weight Proportion Topic (WPT), are then used to assign test images to the corresponding specialized model. Experimental results show that both the SIFT-based and BGF-based pipelines outperform non-clustered CNN models and a baseline method using Incremental PCA, K-means, Same-Cluster Prediction, and unweighted Ensemble Voting. The SIFT pipeline achieves the highest accuracy of 95.24% with the MPT strategy, while the BGF pipeline achieves 93.76% with the WPT strategy. These findings confirm that semantic structure introduced through topic modeling substantially improves CNN classification performance.</p>
	]]></content:encoded>

	<dc:title>Topic-Modeling Guided Semantic Clustering for Enhancing CNN-Based Image Classification Using Scale-Invariant Feature Transform and Block Gabor Filtering</dc:title>
			<dc:creator>Natthaphong Suthamno</dc:creator>
			<dc:creator>Jessada Tanthanuch</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020070</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-09</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-09</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>70</prism:startingPage>
		<prism:doi>10.3390/jimaging12020070</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/70</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/69">

	<title>J. Imaging, Vol. 12, Pages 69: YOLO11s-UAV: An Advanced Algorithm for Small Object Detection in UAV Aerial Imagery</title>
	<link>https://www.mdpi.com/2313-433X/12/2/69</link>
	<description>Unmanned aerial vehicles (UAVs) are now widely used in various applications, including agriculture, urban traffic management, and search and rescue operations. However, several challenges arise, including the small size of objects occupying only a sparse number of pixels in images, complex backgrounds in aerial footage, and limited computational resources onboard. To address these issues, this paper proposes an improved UAV-based small object detection algorithm, YOLO11s-UAV, specifically designed for aerial imagery. Firstly, we introduce a novel FPN, called Content-Aware Reassembly and Interaction Feature Pyramid Network (CARIFPN), which significantly enhances small object feature detection while reducing redundant network structures. Secondly, we apply a new downsampling convolution for small object feature extraction, called Space-to-Depth for Dilation-wise Residual Convolution (S2DResConv), in the model&amp;amp;rsquo;s backbone. This module effectively eliminates information loss caused by strided convolution or pooling operations and facilitates the capture of multi-scale context. Finally, we integrate a simple, parameter-free attention module (SimAM) with C3k2 to form Flexible SimAM (FlexSimAM), which is applied throughout the entire model. This improved module not only reduces the model&amp;amp;rsquo;s complexity but also enables efficient enhancement of small object features in complex scenarios. Experimental results demonstrate that on the VisDrone-DET2019 dataset, our model improves mAP@0.5 by 7.8% on the validation set (reaching 46.0%) and by 5.9% on the test set (increasing to 37.3%) compared to the baseline YOLO11s, while reducing model parameters by 55.3%. Similarly, it achieves a 7.2% improvement on the TinyPerson dataset and a 3.0% increase on UAVDT-DET. Deployment on the NVIDIA Jetson Orin NX SUPER platform shows that our model achieves 33 FPS, which is 21.4% lower than YOLO11s, confirming its feasibility for real-time onboard UAV applications.</description>
	<pubDate>2026-02-06</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 69: YOLO11s-UAV: An Advanced Algorithm for Small Object Detection in UAV Aerial Imagery</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/69">doi: 10.3390/jimaging12020069</a></p>
	<p>Authors:
		Qi Mi
		Jianshu Chao
		Anqi Chen
		Kaiyuan Zhang
		Jiahua Lai
		</p>
	<p>Unmanned aerial vehicles (UAVs) are now widely used in various applications, including agriculture, urban traffic management, and search and rescue operations. However, several challenges arise, including the small size of objects occupying only a sparse number of pixels in images, complex backgrounds in aerial footage, and limited computational resources onboard. To address these issues, this paper proposes an improved UAV-based small object detection algorithm, YOLO11s-UAV, specifically designed for aerial imagery. Firstly, we introduce a novel FPN, called Content-Aware Reassembly and Interaction Feature Pyramid Network (CARIFPN), which significantly enhances small object feature detection while reducing redundant network structures. Secondly, we apply a new downsampling convolution for small object feature extraction, called Space-to-Depth for Dilation-wise Residual Convolution (S2DResConv), in the model&amp;amp;rsquo;s backbone. This module effectively eliminates information loss caused by strided convolution or pooling operations and facilitates the capture of multi-scale context. Finally, we integrate a simple, parameter-free attention module (SimAM) with C3k2 to form Flexible SimAM (FlexSimAM), which is applied throughout the entire model. This improved module not only reduces the model&amp;amp;rsquo;s complexity but also enables efficient enhancement of small object features in complex scenarios. Experimental results demonstrate that on the VisDrone-DET2019 dataset, our model improves mAP@0.5 by 7.8% on the validation set (reaching 46.0%) and by 5.9% on the test set (increasing to 37.3%) compared to the baseline YOLO11s, while reducing model parameters by 55.3%. Similarly, it achieves a 7.2% improvement on the TinyPerson dataset and a 3.0% increase on UAVDT-DET. Deployment on the NVIDIA Jetson Orin NX SUPER platform shows that our model achieves 33 FPS, which is 21.4% lower than YOLO11s, confirming its feasibility for real-time onboard UAV applications.</p>
	]]></content:encoded>

	<dc:title>YOLO11s-UAV: An Advanced Algorithm for Small Object Detection in UAV Aerial Imagery</dc:title>
			<dc:creator>Qi Mi</dc:creator>
			<dc:creator>Jianshu Chao</dc:creator>
			<dc:creator>Anqi Chen</dc:creator>
			<dc:creator>Kaiyuan Zhang</dc:creator>
			<dc:creator>Jiahua Lai</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020069</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-06</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-06</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>69</prism:startingPage>
		<prism:doi>10.3390/jimaging12020069</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/69</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/68">

	<title>J. Imaging, Vol. 12, Pages 68: Automated Radiological Report Generation from Breast Ultrasound Images Using Vision and Language Transformers</title>
	<link>https://www.mdpi.com/2313-433X/12/2/68</link>
	<description>Breast ultrasound imaging is widely used for the detection and characterization of breast abnormalities; however, generating detailed and consistent radiological reports remains a labor-intensive and subjective process. Recent advances in deep learning have demonstrated the potential of automated report generation systems to support clinical workflows, yet most existing approaches focus on chest X-ray imaging and rely on convolutional&amp;amp;ndash;recurrent architectures with limited capacity to model long-range dependencies and complex clinical semantics. In this work, we propose a multimodal Transformer-based framework for automatic breast ultrasound report generation that integrates visual and textual information through cross-attention mechanisms. The proposed architecture employs a Vision Transformer (ViT) to extract rich spatial and morphological features from ultrasound images. For textual embedding, pretrained language models (BERT, BioBERT, and GPT-2) are implemented in various encoder&amp;amp;ndash;decoder configurations to leverage both general linguistic knowledge and domain-specific biomedical semantics. A multimodal Transformer decoder is implemented to autoregressively generate diagnostic reports by jointly attending to visual features and contextualized textual embeddings. We conducted an extensive quantitative evaluation using standard report generation metrics, including BLEU, ROUGE-L, METEOR, and CIDEr, to assess lexical accuracy, semantic alignment, and clinical relevance. Experimental results demonstrate that BioBERT-based models consistently outperform general domain counterparts in clinical specificity, while GPT-2-based decoders improve linguistic fluency.</description>
	<pubDate>2026-02-06</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 68: Automated Radiological Report Generation from Breast Ultrasound Images Using Vision and Language Transformers</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/68">doi: 10.3390/jimaging12020068</a></p>
	<p>Authors:
		Shaheen Khatoon
		Azhar Mahmood
		</p>
	<p>Breast ultrasound imaging is widely used for the detection and characterization of breast abnormalities; however, generating detailed and consistent radiological reports remains a labor-intensive and subjective process. Recent advances in deep learning have demonstrated the potential of automated report generation systems to support clinical workflows, yet most existing approaches focus on chest X-ray imaging and rely on convolutional&amp;amp;ndash;recurrent architectures with limited capacity to model long-range dependencies and complex clinical semantics. In this work, we propose a multimodal Transformer-based framework for automatic breast ultrasound report generation that integrates visual and textual information through cross-attention mechanisms. The proposed architecture employs a Vision Transformer (ViT) to extract rich spatial and morphological features from ultrasound images. For textual embedding, pretrained language models (BERT, BioBERT, and GPT-2) are implemented in various encoder&amp;amp;ndash;decoder configurations to leverage both general linguistic knowledge and domain-specific biomedical semantics. A multimodal Transformer decoder is implemented to autoregressively generate diagnostic reports by jointly attending to visual features and contextualized textual embeddings. We conducted an extensive quantitative evaluation using standard report generation metrics, including BLEU, ROUGE-L, METEOR, and CIDEr, to assess lexical accuracy, semantic alignment, and clinical relevance. Experimental results demonstrate that BioBERT-based models consistently outperform general domain counterparts in clinical specificity, while GPT-2-based decoders improve linguistic fluency.</p>
	]]></content:encoded>

	<dc:title>Automated Radiological Report Generation from Breast Ultrasound Images Using Vision and Language Transformers</dc:title>
			<dc:creator>Shaheen Khatoon</dc:creator>
			<dc:creator>Azhar Mahmood</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020068</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-06</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-06</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>68</prism:startingPage>
		<prism:doi>10.3390/jimaging12020068</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/68</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/67">

	<title>J. Imaging, Vol. 12, Pages 67: Predicting Nutritional and Morphological Attributes of Fresh Commercial Opuntia Cladodes Using Machine Learning and Imaging</title>
	<link>https://www.mdpi.com/2313-433X/12/2/67</link>
	<description>Opuntia ficus-indica L. is a prominent crop in Mexico, requiring advanced non-destructive technologies for the real-time monitoring and quality control of fresh commercial cladodes. The primary research objective of this study was to develop and validate high-precision mathematical models that correlate hyperspectral signatures (400&amp;amp;ndash;1000 nm) with the specific nutritional, morphological, and antioxidant attributes of fresh cladodes (cultivar Villanueva) at their peak commercial maturity. By combining hyperspectral imaging (HSI) with machine learning algorithms, including K-Means clustering for image preprocessing and Partial Least Squares Regression (PLSR) for predictive modeling, this study successfully predicted the concentrations of 10 minerals (N, P, K, Ca, Mg, Fe, B, Mn, Zn, and Cu), chlorophylls (a, b, and Total), and antioxidant capacities (ABTS, FRAP, and DPPH). The innovative nature of this work lies in the simultaneous non-destructive quantification of 17 distinct variables from a single scan, achieving coefficients of determination (R2) as high as 0.988 for Phosphorus and Chlorophyll b. The practical applicability of this research provides a viable replacement for time-consuming and destructive laboratory acid digestion, enabling producers to implement automated, high-throughput sorting lines for quality assurance. Furthermore, this study establishes a framework for interdisciplinary collaborations between agricultural engineers, data scientists for algorithm optimization, and food scientists to enhance the functional value chain of Opuntia products.</description>
	<pubDate>2026-02-05</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 67: Predicting Nutritional and Morphological Attributes of Fresh Commercial Opuntia Cladodes Using Machine Learning and Imaging</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/67">doi: 10.3390/jimaging12020067</a></p>
	<p>Authors:
		Juan Arredondo Valdez
		Josué Israel García López
		Héctor Flores Breceda
		Ajay Kumar
		Ricardo David Valdez Cepeda
		Alejandro Isabel Luna Maldonado
		</p>
	<p>Opuntia ficus-indica L. is a prominent crop in Mexico, requiring advanced non-destructive technologies for the real-time monitoring and quality control of fresh commercial cladodes. The primary research objective of this study was to develop and validate high-precision mathematical models that correlate hyperspectral signatures (400&amp;amp;ndash;1000 nm) with the specific nutritional, morphological, and antioxidant attributes of fresh cladodes (cultivar Villanueva) at their peak commercial maturity. By combining hyperspectral imaging (HSI) with machine learning algorithms, including K-Means clustering for image preprocessing and Partial Least Squares Regression (PLSR) for predictive modeling, this study successfully predicted the concentrations of 10 minerals (N, P, K, Ca, Mg, Fe, B, Mn, Zn, and Cu), chlorophylls (a, b, and Total), and antioxidant capacities (ABTS, FRAP, and DPPH). The innovative nature of this work lies in the simultaneous non-destructive quantification of 17 distinct variables from a single scan, achieving coefficients of determination (R2) as high as 0.988 for Phosphorus and Chlorophyll b. The practical applicability of this research provides a viable replacement for time-consuming and destructive laboratory acid digestion, enabling producers to implement automated, high-throughput sorting lines for quality assurance. Furthermore, this study establishes a framework for interdisciplinary collaborations between agricultural engineers, data scientists for algorithm optimization, and food scientists to enhance the functional value chain of Opuntia products.</p>
	]]></content:encoded>

	<dc:title>Predicting Nutritional and Morphological Attributes of Fresh Commercial Opuntia Cladodes Using Machine Learning and Imaging</dc:title>
			<dc:creator>Juan Arredondo Valdez</dc:creator>
			<dc:creator>Josué Israel García López</dc:creator>
			<dc:creator>Héctor Flores Breceda</dc:creator>
			<dc:creator>Ajay Kumar</dc:creator>
			<dc:creator>Ricardo David Valdez Cepeda</dc:creator>
			<dc:creator>Alejandro Isabel Luna Maldonado</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020067</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-05</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-05</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>67</prism:startingPage>
		<prism:doi>10.3390/jimaging12020067</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/67</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/66">

	<title>J. Imaging, Vol. 12, Pages 66: A Survey of Crop Disease Recognition Methods Based on Spectral and RGB Images</title>
	<link>https://www.mdpi.com/2313-433X/12/2/66</link>
	<description>Major crops worldwide are affected by various diseases yearly, leading to crop losses in different regions. The primary methods for addressing crop disease losses include manual inspection and chemical control. However, traditional manual inspection methods are time-consuming, labor-intensive, and require specialized knowledge. The preemptive use of chemicals also poses a risk of soil pollution, which may cause irreversible damage. With the advancement of computer hardware, photographic technology, and artificial intelligence, crop disease recognition methods based on spectral and red&amp;amp;ndash;green&amp;amp;ndash;blue (RGB) images not only recognize diseases without damaging the crops but also offer high accuracy and speed of recognition, essentially solving the problems associated with manual inspection and chemical control. This paper summarizes the research on disease recognition methods based on spectral and RGB images, with the literature spanning from 2020 through early 2025. Unlike previous surveys, this paper reviews recent advances involving emerging paradigms such as State Space Models (e.g., Mamba) and Generative AI in the context of crop disease recognition. In addition, it introduces public datasets and commonly used evaluation metrics for crop disease identification. Finally, the paper discusses potential issues and solutions encountered during research, including the use of diffusion models for data augmentation. Hopefully, this survey will help readers understand the current methods and effectiveness of crop disease detection, inspiring the development of more effective methods to assist farmers in identifying crop diseases.</description>
	<pubDate>2026-02-05</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 66: A Survey of Crop Disease Recognition Methods Based on Spectral and RGB Images</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/66">doi: 10.3390/jimaging12020066</a></p>
	<p>Authors:
		Haoze Zheng
		Heran Wang
		Hualong Dong
		Yurong Qian
		</p>
	<p>Major crops worldwide are affected by various diseases yearly, leading to crop losses in different regions. The primary methods for addressing crop disease losses include manual inspection and chemical control. However, traditional manual inspection methods are time-consuming, labor-intensive, and require specialized knowledge. The preemptive use of chemicals also poses a risk of soil pollution, which may cause irreversible damage. With the advancement of computer hardware, photographic technology, and artificial intelligence, crop disease recognition methods based on spectral and red&amp;amp;ndash;green&amp;amp;ndash;blue (RGB) images not only recognize diseases without damaging the crops but also offer high accuracy and speed of recognition, essentially solving the problems associated with manual inspection and chemical control. This paper summarizes the research on disease recognition methods based on spectral and RGB images, with the literature spanning from 2020 through early 2025. Unlike previous surveys, this paper reviews recent advances involving emerging paradigms such as State Space Models (e.g., Mamba) and Generative AI in the context of crop disease recognition. In addition, it introduces public datasets and commonly used evaluation metrics for crop disease identification. Finally, the paper discusses potential issues and solutions encountered during research, including the use of diffusion models for data augmentation. Hopefully, this survey will help readers understand the current methods and effectiveness of crop disease detection, inspiring the development of more effective methods to assist farmers in identifying crop diseases.</p>
	]]></content:encoded>

	<dc:title>A Survey of Crop Disease Recognition Methods Based on Spectral and RGB Images</dc:title>
			<dc:creator>Haoze Zheng</dc:creator>
			<dc:creator>Heran Wang</dc:creator>
			<dc:creator>Hualong Dong</dc:creator>
			<dc:creator>Yurong Qian</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020066</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-05</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-05</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Review</prism:section>
	<prism:startingPage>66</prism:startingPage>
		<prism:doi>10.3390/jimaging12020066</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/66</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/65">

	<title>J. Imaging, Vol. 12, Pages 65: Ciphertext-Only Attack on Grayscale-Based EtC Image Encryption via Component Separation and Regularized Single-Channel Compatibility</title>
	<link>https://www.mdpi.com/2313-433X/12/2/65</link>
	<description>Grayscale-based Encryption-then-Compression (EtC) systems transform RGB images into the YCbCr color space, concatenate the components into a single grayscale image, and apply block permutation, block rotation/flipping, and block-wise negative&amp;amp;ndash;positive inversion. Because this pipeline separates color components and disrupts inter-channel statistics, existing extended jigsaw puzzle solvers (JPSs) have been regarded as ineffective, and grayscale-based EtC systems have been considered resistant to ciphertext-only visual reconstruction. In this paper, we present a practical ciphertext-only attack against grayscale-based EtC. The proposed attack introduces three key components: (i) Texture-Based Component Classification (TBCC) to distinguish luminance (Y) and chrominance (Cb/Cr) blocks and focus reconstruction on structure-rich regions; (ii) Regularized Single-Channel Edge Compatibility (R-SCEC), which applies Tikhonov regularization to a single-channel variant of the Mahalanobis Gradient Compatibility (MGC) measure to alleviate covariance rank-deficiency while maintaining robustness under inversion and geometric transforms; and (iii) Adaptive Pruning based on the TBCC-reduced search space that skips redundant boundary matching computations to further improve reconstruction efficiency. Experiments show that, in settings where existing extended JPS solvers fail, our method can still recover visually recognizable semantic content, revealing a potential vulnerability in grayscale-based EtC and calling for a re-evaluation of its security.</description>
	<pubDate>2026-02-05</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 65: Ciphertext-Only Attack on Grayscale-Based EtC Image Encryption via Component Separation and Regularized Single-Channel Compatibility</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/65">doi: 10.3390/jimaging12020065</a></p>
	<p>Authors:
		Ruifeng Li
		Masaaki Fujiyoshi
		</p>
	<p>Grayscale-based Encryption-then-Compression (EtC) systems transform RGB images into the YCbCr color space, concatenate the components into a single grayscale image, and apply block permutation, block rotation/flipping, and block-wise negative&amp;amp;ndash;positive inversion. Because this pipeline separates color components and disrupts inter-channel statistics, existing extended jigsaw puzzle solvers (JPSs) have been regarded as ineffective, and grayscale-based EtC systems have been considered resistant to ciphertext-only visual reconstruction. In this paper, we present a practical ciphertext-only attack against grayscale-based EtC. The proposed attack introduces three key components: (i) Texture-Based Component Classification (TBCC) to distinguish luminance (Y) and chrominance (Cb/Cr) blocks and focus reconstruction on structure-rich regions; (ii) Regularized Single-Channel Edge Compatibility (R-SCEC), which applies Tikhonov regularization to a single-channel variant of the Mahalanobis Gradient Compatibility (MGC) measure to alleviate covariance rank-deficiency while maintaining robustness under inversion and geometric transforms; and (iii) Adaptive Pruning based on the TBCC-reduced search space that skips redundant boundary matching computations to further improve reconstruction efficiency. Experiments show that, in settings where existing extended JPS solvers fail, our method can still recover visually recognizable semantic content, revealing a potential vulnerability in grayscale-based EtC and calling for a re-evaluation of its security.</p>
	]]></content:encoded>

	<dc:title>Ciphertext-Only Attack on Grayscale-Based EtC Image Encryption via Component Separation and Regularized Single-Channel Compatibility</dc:title>
			<dc:creator>Ruifeng Li</dc:creator>
			<dc:creator>Masaaki Fujiyoshi</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020065</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-02-05</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-02-05</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>65</prism:startingPage>
		<prism:doi>10.3390/jimaging12020065</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/65</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/64">

	<title>J. Imaging, Vol. 12, Pages 64: SIFT-SNN for Traffic-Flow Infrastructure Safety: A Real-Time Context-Aware Anomaly Detection Framework</title>
	<link>https://www.mdpi.com/2313-433X/12/2/64</link>
	<description>Automated anomaly detection in transportation infrastructure is essential for enhancing safety and reducing the operational costs associated with manual inspection protocols. This study presents an improved neuromorphic vision system, which extends the prior SIFT-SNN (scale-invariant feature transform&amp;amp;ndash;spiking neural network) proof-of-concept by incorporating temporal feature aggregation for context-aware and sequence-stable detection. Analysis of classical stitching-based pipelines exposed sensitivity to motion and lighting variations, motivating the proposed temporally smoothed neuromorphic design. SIFT keypoints are encoded into latency-based spike trains and classified using a leaky integrate-and-fire (LIF) spiking neural network implemented in PyTorch. Evaluated across three hardware configurations&amp;amp;mdash;an NVIDIA RTX 4060 GPU, an Intel i7 CPU, and a simulated Jetson Nano&amp;amp;mdash;the system achieved 92.3% accuracy and a macro F1 score of 91.0% under five-fold cross-validation. Inference latencies were measured at 9.5 ms, 26.1 ms, and ~48.3 ms per frame, respectively. Memory footprints were under 290 MB, and power consumption was estimated to be between 5 and 65 W. The classifier distinguishes between safe, partially dislodged, and fully dislodged barrier pins, which are critical failure modes for the Auckland Harbour Bridge&amp;amp;rsquo;s Movable Concrete Barrier (MCB) system. Temporal smoothing further improves recall for ambiguous cases. By achieving a compact model size (2.9 MB), low-latency inference, and minimal power demands, the proposed framework offers a deployable, interpretable, and energy-efficient alternative to conventional CNN-based inspection tools. Future work will focus on exploring the generalisability and transferability of the work presented, additional input sources, and human&amp;amp;ndash;computer interaction paradigms for various deployment infrastructures and advancements.</description>
	<pubDate>2026-01-31</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 64: SIFT-SNN for Traffic-Flow Infrastructure Safety: A Real-Time Context-Aware Anomaly Detection Framework</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/64">doi: 10.3390/jimaging12020064</a></p>
	<p>Authors:
		Munish Rathee
		Boris Bačić
		Maryam Doborjeh
		</p>
	<p>Automated anomaly detection in transportation infrastructure is essential for enhancing safety and reducing the operational costs associated with manual inspection protocols. This study presents an improved neuromorphic vision system, which extends the prior SIFT-SNN (scale-invariant feature transform&amp;amp;ndash;spiking neural network) proof-of-concept by incorporating temporal feature aggregation for context-aware and sequence-stable detection. Analysis of classical stitching-based pipelines exposed sensitivity to motion and lighting variations, motivating the proposed temporally smoothed neuromorphic design. SIFT keypoints are encoded into latency-based spike trains and classified using a leaky integrate-and-fire (LIF) spiking neural network implemented in PyTorch. Evaluated across three hardware configurations&amp;amp;mdash;an NVIDIA RTX 4060 GPU, an Intel i7 CPU, and a simulated Jetson Nano&amp;amp;mdash;the system achieved 92.3% accuracy and a macro F1 score of 91.0% under five-fold cross-validation. Inference latencies were measured at 9.5 ms, 26.1 ms, and ~48.3 ms per frame, respectively. Memory footprints were under 290 MB, and power consumption was estimated to be between 5 and 65 W. The classifier distinguishes between safe, partially dislodged, and fully dislodged barrier pins, which are critical failure modes for the Auckland Harbour Bridge&amp;amp;rsquo;s Movable Concrete Barrier (MCB) system. Temporal smoothing further improves recall for ambiguous cases. By achieving a compact model size (2.9 MB), low-latency inference, and minimal power demands, the proposed framework offers a deployable, interpretable, and energy-efficient alternative to conventional CNN-based inspection tools. Future work will focus on exploring the generalisability and transferability of the work presented, additional input sources, and human&amp;amp;ndash;computer interaction paradigms for various deployment infrastructures and advancements.</p>
	]]></content:encoded>

	<dc:title>SIFT-SNN for Traffic-Flow Infrastructure Safety: A Real-Time Context-Aware Anomaly Detection Framework</dc:title>
			<dc:creator>Munish Rathee</dc:creator>
			<dc:creator>Boris Bačić</dc:creator>
			<dc:creator>Maryam Doborjeh</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020064</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-31</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-31</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>64</prism:startingPage>
		<prism:doi>10.3390/jimaging12020064</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/64</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/63">

	<title>J. Imaging, Vol. 12, Pages 63: A Cross-Domain Benchmark of Intrinsic and Post Hoc Explainability for 3D Deep Learning Models</title>
	<link>https://www.mdpi.com/2313-433X/12/2/63</link>
	<description>Deep learning models for three-dimensional (3D) data are increasingly used in domains such as medical imaging, object recognition, and robotics. At the same time, the use of AI in these domains is increasing, while, due to their black-box nature, the need for explainability has grown significantly. However, the lack of standardized and quantitative benchmarks for explainable artificial intelligence (XAI) in 3D data limits the reliable comparison of explanation quality. In this paper, we present a unified benchmarking framework to evaluate both intrinsic and post hoc XAI methods across three representative 3D datasets: volumetric CT scans (MosMed), voxelized CAD models (ModelNet40), and real-world point clouds (ScanObjectNN). The evaluated methods include Grad-CAM, Integrated Gradients, Saliency, Occlusion, and the intrinsic ResAttNet-3D model. We quantitatively assess explanations using the Correctness (AOPC), Completeness (AUPC), and Compactness metrics, consistently applied across all datasets. Our results show that explanation quality significantly varies across methods and domains, demonstrating that Grad-CAM and intrinsic attention performed best on medical CT scans, while gradient-based methods excelled on voxelized and point-based data. Statistical tests (Kruskal&amp;amp;ndash;Wallis and Mann&amp;amp;ndash;Whitney U) confirmed significant performance differences between methods. No single approach achieved superior results across all domains, highlighting the importance of multi-metric evaluation. This work provides a reproducible framework for standardized assessment of 3D explainability and comparative insights to guide future XAI method selection.</description>
	<pubDate>2026-01-30</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 63: A Cross-Domain Benchmark of Intrinsic and Post Hoc Explainability for 3D Deep Learning Models</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/63">doi: 10.3390/jimaging12020063</a></p>
	<p>Authors:
		Asmita Chakraborty
		Gizem Karagoz
		Nirvana Meratnia
		</p>
	<p>Deep learning models for three-dimensional (3D) data are increasingly used in domains such as medical imaging, object recognition, and robotics. At the same time, the use of AI in these domains is increasing, while, due to their black-box nature, the need for explainability has grown significantly. However, the lack of standardized and quantitative benchmarks for explainable artificial intelligence (XAI) in 3D data limits the reliable comparison of explanation quality. In this paper, we present a unified benchmarking framework to evaluate both intrinsic and post hoc XAI methods across three representative 3D datasets: volumetric CT scans (MosMed), voxelized CAD models (ModelNet40), and real-world point clouds (ScanObjectNN). The evaluated methods include Grad-CAM, Integrated Gradients, Saliency, Occlusion, and the intrinsic ResAttNet-3D model. We quantitatively assess explanations using the Correctness (AOPC), Completeness (AUPC), and Compactness metrics, consistently applied across all datasets. Our results show that explanation quality significantly varies across methods and domains, demonstrating that Grad-CAM and intrinsic attention performed best on medical CT scans, while gradient-based methods excelled on voxelized and point-based data. Statistical tests (Kruskal&amp;amp;ndash;Wallis and Mann&amp;amp;ndash;Whitney U) confirmed significant performance differences between methods. No single approach achieved superior results across all domains, highlighting the importance of multi-metric evaluation. This work provides a reproducible framework for standardized assessment of 3D explainability and comparative insights to guide future XAI method selection.</p>
	]]></content:encoded>

	<dc:title>A Cross-Domain Benchmark of Intrinsic and Post Hoc Explainability for 3D Deep Learning Models</dc:title>
			<dc:creator>Asmita Chakraborty</dc:creator>
			<dc:creator>Gizem Karagoz</dc:creator>
			<dc:creator>Nirvana Meratnia</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020063</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-30</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-30</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>63</prism:startingPage>
		<prism:doi>10.3390/jimaging12020063</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/63</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/62">

	<title>J. Imaging, Vol. 12, Pages 62: AACNN-ViT: Adaptive Attention-Augmented Convolutional and Vision Transformer Fusion for Lung Cancer Detection</title>
	<link>https://www.mdpi.com/2313-433X/12/2/62</link>
	<description>Lung cancer remains a leading cause of cancer-related mortality. Although reliable multiclass classification of lung lesions from CT imaging is essential for early diagnosis, it remains challenging due to subtle inter-class differences, limited sample sizes, and class imbalance. We propose an Adaptive Attention-Augmented Convolutional Neural Network with Vision Transformer (AACNN-ViT), a hybrid framework that integrates local convolutional representations with global transformer embeddings through an adaptive attention-based fusion module. The CNN branch captures fine-grained spatial patterns, the ViT branch encodes long-range contextual dependencies, and the adaptive fusion mechanism learns to weight cross-representation interactions to improve discriminability. To reduce the impact of imbalance, a hybrid objective that combines focal loss with categorical cross-entropy is incorporated during training. Experiments on the IQ-OTH/NCCD dataset (benign, malignant, and normal) show consistent performance progression in an ablation-style evaluation: CNN-only, ViT-only, CNN-ViT concatenation, and AACNN-ViT. The proposed AACNN-ViT achieved 96.97% accuracy on the validation set with macro-averaged precision/recall/F1 of 0.9588/0.9352/0.9458 and weighted F1 of 0.9693, substantially improving minority-class recognition (Benign recall 0.8333) compared with CNN-ViT (accuracy 89.09%, macro-F1 0.7680). One-vs.-rest ROC analysis further indicates strong separability across all classes (micro-average AUC 0.992). These results suggest that adaptive attention-based fusion offers a robust and clinically relevant approach for computer-aided lung cancer screening and decision support.</description>
	<pubDate>2026-01-30</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 62: AACNN-ViT: Adaptive Attention-Augmented Convolutional and Vision Transformer Fusion for Lung Cancer Detection</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/62">doi: 10.3390/jimaging12020062</a></p>
	<p>Authors:
		Mohammad Ishtiaque Rahman
		Amrina Rahman
		</p>
	<p>Lung cancer remains a leading cause of cancer-related mortality. Although reliable multiclass classification of lung lesions from CT imaging is essential for early diagnosis, it remains challenging due to subtle inter-class differences, limited sample sizes, and class imbalance. We propose an Adaptive Attention-Augmented Convolutional Neural Network with Vision Transformer (AACNN-ViT), a hybrid framework that integrates local convolutional representations with global transformer embeddings through an adaptive attention-based fusion module. The CNN branch captures fine-grained spatial patterns, the ViT branch encodes long-range contextual dependencies, and the adaptive fusion mechanism learns to weight cross-representation interactions to improve discriminability. To reduce the impact of imbalance, a hybrid objective that combines focal loss with categorical cross-entropy is incorporated during training. Experiments on the IQ-OTH/NCCD dataset (benign, malignant, and normal) show consistent performance progression in an ablation-style evaluation: CNN-only, ViT-only, CNN-ViT concatenation, and AACNN-ViT. The proposed AACNN-ViT achieved 96.97% accuracy on the validation set with macro-averaged precision/recall/F1 of 0.9588/0.9352/0.9458 and weighted F1 of 0.9693, substantially improving minority-class recognition (Benign recall 0.8333) compared with CNN-ViT (accuracy 89.09%, macro-F1 0.7680). One-vs.-rest ROC analysis further indicates strong separability across all classes (micro-average AUC 0.992). These results suggest that adaptive attention-based fusion offers a robust and clinically relevant approach for computer-aided lung cancer screening and decision support.</p>
	]]></content:encoded>

	<dc:title>AACNN-ViT: Adaptive Attention-Augmented Convolutional and Vision Transformer Fusion for Lung Cancer Detection</dc:title>
			<dc:creator>Mohammad Ishtiaque Rahman</dc:creator>
			<dc:creator>Amrina Rahman</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020062</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-30</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-30</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>62</prism:startingPage>
		<prism:doi>10.3390/jimaging12020062</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/62</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/61">

	<title>J. Imaging, Vol. 12, Pages 61: Multiscale RGB-Guided Fusion for Hyperspectral Image Super-Resolution</title>
	<link>https://www.mdpi.com/2313-433X/12/2/61</link>
	<description>Hyperspectral imaging (HSI) enables fine spectral analysis but is often limited by low spatial resolution due to sensor constraints. To address this, we propose CGNet, a color-guided hyperspectral super-resolution network that leverages complementary information from low-resolution hyperspectral inputs and high-resolution RGB images. CGNet adopts a dual-encoder design: the RGB encoder extracts hierarchical spatial features, while the HSI encoder progressively upsamples spectral features. A multi-scale fusion decoder then combines both modalities in a coarse-to-fine manner to reconstruct the high-resolution HSI. Training is driven by a hybrid loss that balances L1 and Spectral Angle Mapper (SAM), which ablation studies confirm as the most effective formulation. Experiments on two benchmarks, ARAD1K and StereoMSI, at &amp;amp;times;4 and &amp;amp;times;6 upscaling factors demonstrate that CGNet consistently outperforms state-of-the-art baselines. CGNet achieves higher PSNR and SSIM, lower SAM, and reduced &amp;amp;Delta;E00, confirming its ability to recover sharp spatial structures while preserving spectral fidelity.</description>
	<pubDate>2026-01-28</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 61: Multiscale RGB-Guided Fusion for Hyperspectral Image Super-Resolution</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/61">doi: 10.3390/jimaging12020061</a></p>
	<p>Authors:
		Matteo Kolyszko
		Marco Buzzelli
		Simone Bianco
		Raimondo Schettini
		</p>
	<p>Hyperspectral imaging (HSI) enables fine spectral analysis but is often limited by low spatial resolution due to sensor constraints. To address this, we propose CGNet, a color-guided hyperspectral super-resolution network that leverages complementary information from low-resolution hyperspectral inputs and high-resolution RGB images. CGNet adopts a dual-encoder design: the RGB encoder extracts hierarchical spatial features, while the HSI encoder progressively upsamples spectral features. A multi-scale fusion decoder then combines both modalities in a coarse-to-fine manner to reconstruct the high-resolution HSI. Training is driven by a hybrid loss that balances L1 and Spectral Angle Mapper (SAM), which ablation studies confirm as the most effective formulation. Experiments on two benchmarks, ARAD1K and StereoMSI, at &amp;amp;times;4 and &amp;amp;times;6 upscaling factors demonstrate that CGNet consistently outperforms state-of-the-art baselines. CGNet achieves higher PSNR and SSIM, lower SAM, and reduced &amp;amp;Delta;E00, confirming its ability to recover sharp spatial structures while preserving spectral fidelity.</p>
	]]></content:encoded>

	<dc:title>Multiscale RGB-Guided Fusion for Hyperspectral Image Super-Resolution</dc:title>
			<dc:creator>Matteo Kolyszko</dc:creator>
			<dc:creator>Marco Buzzelli</dc:creator>
			<dc:creator>Simone Bianco</dc:creator>
			<dc:creator>Raimondo Schettini</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020061</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-28</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-28</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>61</prism:startingPage>
		<prism:doi>10.3390/jimaging12020061</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/61</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/60">

	<title>J. Imaging, Vol. 12, Pages 60: Real-Time Visual Anomaly Detection in High-Speed Motorsport: An Entropy-Driven Hybrid Retrieval- and Cache-Augmented Architecture</title>
	<link>https://www.mdpi.com/2313-433X/12/2/60</link>
	<description>At 300 km/h, an end-to-end vision delay of 100 ms corresponds to 8.3 m of unobserved travel; therefore, real-time anomaly monitoring must balance sensitivity with strict tail-latency constraints at the edge. We propose a hybrid cache&amp;amp;ndash;retrieval inference architecture for visual anomaly detection in high-speed motorsport that exploits lap-to-lap spatiotemporal redundancy while reserving local similarity retrieval for genuinely uncertain events. The system combines a hierarchical visual encoder (a lightweight backbone with selective refinement via a Nested U-Net for texture-level cues) and an uncertainty-driven router that selects between two memory pathways: (i) a static cache of precomputed scene embeddings for track/background context and (ii) local similarity retrieval over historical telemetry&amp;amp;ndash;vision patterns to ground ambiguous frames, improve interpretability, and stabilize decisions under high uncertainty. Routing is governed by an entropy signal computed from prediction and embedding uncertainty: low-entropy frames follow a cache-first path, whereas high-entropy frames trigger retrieval and refinement to preserve decision stability without sacrificing latency. On a high-fidelity closed-circuit benchmark with synchronized onboard video and telemetry and controlled anomaly injections (tire degradation, suspension chatter, and illumination shifts), the proposed approach reduces mean end-to-end latency to 21.7 ms versus 48.6 ms for a retrieval-only baseline (55.3% reduction) while achieving Macro-F1 = 0.89 at safety-oriented operating points. The framework is designed for passive monitoring and decision support, producing advisory outputs without actuating ECU control strategies.</description>
	<pubDate>2026-01-28</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 60: Real-Time Visual Anomaly Detection in High-Speed Motorsport: An Entropy-Driven Hybrid Retrieval- and Cache-Augmented Architecture</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/60">doi: 10.3390/jimaging12020060</a></p>
	<p>Authors:
		Rubén Juárez Cádiz
		Fernando Rodríguez-Sela
		</p>
	<p>At 300 km/h, an end-to-end vision delay of 100 ms corresponds to 8.3 m of unobserved travel; therefore, real-time anomaly monitoring must balance sensitivity with strict tail-latency constraints at the edge. We propose a hybrid cache&amp;amp;ndash;retrieval inference architecture for visual anomaly detection in high-speed motorsport that exploits lap-to-lap spatiotemporal redundancy while reserving local similarity retrieval for genuinely uncertain events. The system combines a hierarchical visual encoder (a lightweight backbone with selective refinement via a Nested U-Net for texture-level cues) and an uncertainty-driven router that selects between two memory pathways: (i) a static cache of precomputed scene embeddings for track/background context and (ii) local similarity retrieval over historical telemetry&amp;amp;ndash;vision patterns to ground ambiguous frames, improve interpretability, and stabilize decisions under high uncertainty. Routing is governed by an entropy signal computed from prediction and embedding uncertainty: low-entropy frames follow a cache-first path, whereas high-entropy frames trigger retrieval and refinement to preserve decision stability without sacrificing latency. On a high-fidelity closed-circuit benchmark with synchronized onboard video and telemetry and controlled anomaly injections (tire degradation, suspension chatter, and illumination shifts), the proposed approach reduces mean end-to-end latency to 21.7 ms versus 48.6 ms for a retrieval-only baseline (55.3% reduction) while achieving Macro-F1 = 0.89 at safety-oriented operating points. The framework is designed for passive monitoring and decision support, producing advisory outputs without actuating ECU control strategies.</p>
	]]></content:encoded>

	<dc:title>Real-Time Visual Anomaly Detection in High-Speed Motorsport: An Entropy-Driven Hybrid Retrieval- and Cache-Augmented Architecture</dc:title>
			<dc:creator>Rubén Juárez Cádiz</dc:creator>
			<dc:creator>Fernando Rodríguez-Sela</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020060</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-28</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-28</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>60</prism:startingPage>
		<prism:doi>10.3390/jimaging12020060</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/60</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/59">

	<title>J. Imaging, Vol. 12, Pages 59: Neuro-Geometric Graph Transformers with Differentiable Radiographic Geometry for Spinal X-Ray Image Analysis</title>
	<link>https://www.mdpi.com/2313-433X/12/2/59</link>
	<description>Radiographic imaging remains a cornerstone of diagnostic practice. However, accurate interpretation faces challenges from subtle visual signatures, anatomical variability, and inter-observer inconsistency. Conventional deep learning approaches, such as convolutional neural networks and vision transformers, deliver strong predictive performance but often lack anatomical grounding and interpretability, limiting their trustworthiness in imaging applications. To address these challenges, we present SpineNeuroSym, a neuro-geometric imaging framework that unifies geometry-aware learning and symbolic reasoning for explainable medical image analysis. The framework integrates weakly supervised keypoint and region-of-interest discovery, a dual-stream graph&amp;amp;ndash;transformer backbone, and a Differentiable Radiographic Geometry Module (dRGM) that computes clinically relevant indices (e.g., slip ratio, disc asymmetry, sacroiliac spacing, and curvature measures). A Neuro-Symbolic Constraint Layer (NSCL) enforces monotonic logic in image-derived predictions, while a Counterfactual Geometry Diffusion (CGD) module generates rare imaging phenotypes and provides diagnostic auditing through counterfactual validation. Evaluated on a comprehensive dataset of 1613 spinal radiographs from Sunpasitthiprasong Hospital encompassing six diagnostic categories&amp;amp;mdash;spondylolisthesis (n = 496), infection (n = 322), spondyloarthropathy (n = 275), normal cervical (n = 192), normal thoracic (n = 70), and normal lumbar spine (n = 258)&amp;amp;mdash;SpineNeuroSym achieved 89.4% classification accuracy, a macro-F1 of 0.872, and an AUROC of 0.941, outperforming eight state-of-the-art imaging baselines. These results highlight how integrating neuro-geometric modeling, symbolic constraints, and counterfactual validation advances explainable, trustworthy, and reproducible medical imaging AI, establishing a pathway toward transparent image analysis systems.</description>
	<pubDate>2026-01-28</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 59: Neuro-Geometric Graph Transformers with Differentiable Radiographic Geometry for Spinal X-Ray Image Analysis</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/59">doi: 10.3390/jimaging12020059</a></p>
	<p>Authors:
		Vuth Kaveevorayan
		Rapeepan Pitakaso
		Thanatkij Srichok
		Natthapong Nanthasamroeng
		Chutchai Kaewta
		Peerawat Luesak
		</p>
	<p>Radiographic imaging remains a cornerstone of diagnostic practice. However, accurate interpretation faces challenges from subtle visual signatures, anatomical variability, and inter-observer inconsistency. Conventional deep learning approaches, such as convolutional neural networks and vision transformers, deliver strong predictive performance but often lack anatomical grounding and interpretability, limiting their trustworthiness in imaging applications. To address these challenges, we present SpineNeuroSym, a neuro-geometric imaging framework that unifies geometry-aware learning and symbolic reasoning for explainable medical image analysis. The framework integrates weakly supervised keypoint and region-of-interest discovery, a dual-stream graph&amp;amp;ndash;transformer backbone, and a Differentiable Radiographic Geometry Module (dRGM) that computes clinically relevant indices (e.g., slip ratio, disc asymmetry, sacroiliac spacing, and curvature measures). A Neuro-Symbolic Constraint Layer (NSCL) enforces monotonic logic in image-derived predictions, while a Counterfactual Geometry Diffusion (CGD) module generates rare imaging phenotypes and provides diagnostic auditing through counterfactual validation. Evaluated on a comprehensive dataset of 1613 spinal radiographs from Sunpasitthiprasong Hospital encompassing six diagnostic categories&amp;amp;mdash;spondylolisthesis (n = 496), infection (n = 322), spondyloarthropathy (n = 275), normal cervical (n = 192), normal thoracic (n = 70), and normal lumbar spine (n = 258)&amp;amp;mdash;SpineNeuroSym achieved 89.4% classification accuracy, a macro-F1 of 0.872, and an AUROC of 0.941, outperforming eight state-of-the-art imaging baselines. These results highlight how integrating neuro-geometric modeling, symbolic constraints, and counterfactual validation advances explainable, trustworthy, and reproducible medical imaging AI, establishing a pathway toward transparent image analysis systems.</p>
	]]></content:encoded>

	<dc:title>Neuro-Geometric Graph Transformers with Differentiable Radiographic Geometry for Spinal X-Ray Image Analysis</dc:title>
			<dc:creator>Vuth Kaveevorayan</dc:creator>
			<dc:creator>Rapeepan Pitakaso</dc:creator>
			<dc:creator>Thanatkij Srichok</dc:creator>
			<dc:creator>Natthapong Nanthasamroeng</dc:creator>
			<dc:creator>Chutchai Kaewta</dc:creator>
			<dc:creator>Peerawat Luesak</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020059</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-28</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-28</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>59</prism:startingPage>
		<prism:doi>10.3390/jimaging12020059</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/59</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/58">

	<title>J. Imaging, Vol. 12, Pages 58: SFD-ADNet: Spatial&amp;ndash;Frequency Dual-Domain Adaptive Deformation for Point Cloud Data Augmentation</title>
	<link>https://www.mdpi.com/2313-433X/12/2/58</link>
	<description>Existing 3D point cloud enhancement methods typically rely on artificially designed geometric transformations or local blending strategies, which are prone to introducing illogical deformations, struggle to preserve global structure, and exhibit insufficient adaptability to diverse degradation patterns. To address these limitations, this paper proposes SFD-ADNet&amp;amp;mdash;an adaptive deformation framework based on a dual spatial&amp;amp;ndash;frequency domain. It achieves 3D point cloud augmentation by explicitly learning deformation parameters rather than applying predefined perturbations. By jointly modeling spatial structural dependencies and spectral features, SFD-ADNet generates augmented samples that are both structurally aware and task-relevant. In the spatial domain, a hierarchical sequence encoder coupled with a bidirectional Mamba-based deformation predictor captures long-range geometric dependencies and local structural variations, enabling adaptive position-aware deformation control. In the frequency domain, a multi-scale dual-channel mechanism based on adaptive Chebyshev polynomials separates low-frequency structural components from high-frequency details, allowing the model to suppress noise-sensitive distortions while preserving the global geometric skeleton. The two deformation predictions dynamically fuse to balance structural fidelity and sample diversity. Extensive experiments conducted on ModelNet40-C and ScanObjectNN-C involved synthetic CAD models and real-world scanned point clouds under diverse perturbation conditions. SFD-ADNet, as a universal augmentation module, reduces the mCE metrics of PointNet++ and different backbone networks by over 20%. Experiments demonstrate that SFD-ADNet achieves state-of-the-art robustness while preserving critical geometric structures. Furthermore, models enhanced by SFD-ADNet demonstrate consistently improved robustness against diverse point cloud attacks, validating the efficacy of adaptive space-frequency deformation in robust point cloud learning.</description>
	<pubDate>2026-01-26</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 58: SFD-ADNet: Spatial&amp;ndash;Frequency Dual-Domain Adaptive Deformation for Point Cloud Data Augmentation</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/58">doi: 10.3390/jimaging12020058</a></p>
	<p>Authors:
		Jiacheng Bao
		Lingjun Kong
		Wenju Wang
		</p>
	<p>Existing 3D point cloud enhancement methods typically rely on artificially designed geometric transformations or local blending strategies, which are prone to introducing illogical deformations, struggle to preserve global structure, and exhibit insufficient adaptability to diverse degradation patterns. To address these limitations, this paper proposes SFD-ADNet&amp;amp;mdash;an adaptive deformation framework based on a dual spatial&amp;amp;ndash;frequency domain. It achieves 3D point cloud augmentation by explicitly learning deformation parameters rather than applying predefined perturbations. By jointly modeling spatial structural dependencies and spectral features, SFD-ADNet generates augmented samples that are both structurally aware and task-relevant. In the spatial domain, a hierarchical sequence encoder coupled with a bidirectional Mamba-based deformation predictor captures long-range geometric dependencies and local structural variations, enabling adaptive position-aware deformation control. In the frequency domain, a multi-scale dual-channel mechanism based on adaptive Chebyshev polynomials separates low-frequency structural components from high-frequency details, allowing the model to suppress noise-sensitive distortions while preserving the global geometric skeleton. The two deformation predictions dynamically fuse to balance structural fidelity and sample diversity. Extensive experiments conducted on ModelNet40-C and ScanObjectNN-C involved synthetic CAD models and real-world scanned point clouds under diverse perturbation conditions. SFD-ADNet, as a universal augmentation module, reduces the mCE metrics of PointNet++ and different backbone networks by over 20%. Experiments demonstrate that SFD-ADNet achieves state-of-the-art robustness while preserving critical geometric structures. Furthermore, models enhanced by SFD-ADNet demonstrate consistently improved robustness against diverse point cloud attacks, validating the efficacy of adaptive space-frequency deformation in robust point cloud learning.</p>
	]]></content:encoded>

	<dc:title>SFD-ADNet: Spatial&amp;amp;ndash;Frequency Dual-Domain Adaptive Deformation for Point Cloud Data Augmentation</dc:title>
			<dc:creator>Jiacheng Bao</dc:creator>
			<dc:creator>Lingjun Kong</dc:creator>
			<dc:creator>Wenju Wang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020058</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-26</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-26</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>58</prism:startingPage>
		<prism:doi>10.3390/jimaging12020058</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/58</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/57">

	<title>J. Imaging, Vol. 12, Pages 57: CauseHSI: Counterfactual-Augmented Domain Generalization for Hyperspectral Image Classification via Causal Disentanglement</title>
	<link>https://www.mdpi.com/2313-433X/12/2/57</link>
	<description>Cross-scene hyperspectral image (HSI) classification under single-source domain generalization (DG) is a crucial yet challenging task in remote sensing. The core difficulty lies in generalizing from a limited source domain to unseen target scenes. We formalize this through the causal theory, where different sensing scenes are viewed as distinct interventions on a shared physical system. This perspective reveals two fundamental obstacles: interventional distribution shifts arising from varying acquisition conditions, and confounding biases induced by spurious correlations driven by domain-specific factors. Taking the above considerations into account, we propose CauseHSI, a causality-inspired framework that offers new insights into cross-scene HSI classification. CauseHSI consists of two key components: a Counterfactual Generation Module (CGM) that perturbs domain-specific factors to generate diverse counterfactual variants, simulating cross-domain interventions while preserving semantic consistency, and a Causal Disentanglement Module (CDM) that separates invariant causal semantics from spurious correlations through structured constraints under a structural causal model, ultimately guiding the model to focus on domain-invariant and generalizable representations. By aligning model learning with causal principles, CauseHSI enhances robustness against domain shifts. Extensive experiments on the Pavia, Houston, and HyRANK datasets demonstrate that CauseHSI outperforms existing DG methods.</description>
	<pubDate>2026-01-26</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 57: CauseHSI: Counterfactual-Augmented Domain Generalization for Hyperspectral Image Classification via Causal Disentanglement</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/57">doi: 10.3390/jimaging12020057</a></p>
	<p>Authors:
		Xin Li
		Zongchi Yang
		Wenlong Li
		</p>
	<p>Cross-scene hyperspectral image (HSI) classification under single-source domain generalization (DG) is a crucial yet challenging task in remote sensing. The core difficulty lies in generalizing from a limited source domain to unseen target scenes. We formalize this through the causal theory, where different sensing scenes are viewed as distinct interventions on a shared physical system. This perspective reveals two fundamental obstacles: interventional distribution shifts arising from varying acquisition conditions, and confounding biases induced by spurious correlations driven by domain-specific factors. Taking the above considerations into account, we propose CauseHSI, a causality-inspired framework that offers new insights into cross-scene HSI classification. CauseHSI consists of two key components: a Counterfactual Generation Module (CGM) that perturbs domain-specific factors to generate diverse counterfactual variants, simulating cross-domain interventions while preserving semantic consistency, and a Causal Disentanglement Module (CDM) that separates invariant causal semantics from spurious correlations through structured constraints under a structural causal model, ultimately guiding the model to focus on domain-invariant and generalizable representations. By aligning model learning with causal principles, CauseHSI enhances robustness against domain shifts. Extensive experiments on the Pavia, Houston, and HyRANK datasets demonstrate that CauseHSI outperforms existing DG methods.</p>
	]]></content:encoded>

	<dc:title>CauseHSI: Counterfactual-Augmented Domain Generalization for Hyperspectral Image Classification via Causal Disentanglement</dc:title>
			<dc:creator>Xin Li</dc:creator>
			<dc:creator>Zongchi Yang</dc:creator>
			<dc:creator>Wenlong Li</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020057</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-26</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-26</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>57</prism:startingPage>
		<prism:doi>10.3390/jimaging12020057</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/57</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/56">

	<title>J. Imaging, Vol. 12, Pages 56: Use of Patient-Specific 3D Models in Paediatric Surgery: Effect on Communication and Surgical Management</title>
	<link>https://www.mdpi.com/2313-433X/12/2/56</link>
	<description>Children with rare tumours and malformations may benefit from innovative imaging, including patient-specific 3D models that can enhance communication and surgical planning. The primary aim was to evaluate the impact of patient-specific 3D models on communication with families. The secondary aims were to assess their influence on medical management and to establish an efficient post-processing workflow. From 2021 to 2024, we prospectively included patients aged 3 months to 18 years with rare tumours or malformations. Families completed questionnaires before and after the presentation of a 3D model generated from MRI sequences, including peripheral nerve tractography. Treating physicians completed a separate questionnaire before surgical planning. Analyses were performed in R. Among 21 patients, diagnoses included 11 tumours, 8 malformations, 1 trauma, and 1 pancreatic pseudo-cyst. Likert scale responses showed improved family understanding after viewing the 3D model (mean score 3.94 to 4.67) and a high overall evaluation (mean 4.61). Physicians also rated the models positively. An efficient image post-processing workflow was defined. Although manual 3D reconstruction remains time-consuming, these preliminary results show that colourful, patient-specific 3D models substantially improve family communication and support clinical decision-making. They also highlight the need for supporting the development of MRI-based automated segmentation softwares using deep neural networks, which are clinically approved and usable in routine practice.</description>
	<pubDate>2026-01-26</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 56: Use of Patient-Specific 3D Models in Paediatric Surgery: Effect on Communication and Surgical Management</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/56">doi: 10.3390/jimaging12020056</a></p>
	<p>Authors:
		Cécile O. Muller
		Lydia Helbling
		Theodoros Xydias
		Jeanette Greiner
		Valérie Oesch
		Henrik Köhler
		Tim Ohletz
		Jatta Berberat
		</p>
	<p>Children with rare tumours and malformations may benefit from innovative imaging, including patient-specific 3D models that can enhance communication and surgical planning. The primary aim was to evaluate the impact of patient-specific 3D models on communication with families. The secondary aims were to assess their influence on medical management and to establish an efficient post-processing workflow. From 2021 to 2024, we prospectively included patients aged 3 months to 18 years with rare tumours or malformations. Families completed questionnaires before and after the presentation of a 3D model generated from MRI sequences, including peripheral nerve tractography. Treating physicians completed a separate questionnaire before surgical planning. Analyses were performed in R. Among 21 patients, diagnoses included 11 tumours, 8 malformations, 1 trauma, and 1 pancreatic pseudo-cyst. Likert scale responses showed improved family understanding after viewing the 3D model (mean score 3.94 to 4.67) and a high overall evaluation (mean 4.61). Physicians also rated the models positively. An efficient image post-processing workflow was defined. Although manual 3D reconstruction remains time-consuming, these preliminary results show that colourful, patient-specific 3D models substantially improve family communication and support clinical decision-making. They also highlight the need for supporting the development of MRI-based automated segmentation softwares using deep neural networks, which are clinically approved and usable in routine practice.</p>
	]]></content:encoded>

	<dc:title>Use of Patient-Specific 3D Models in Paediatric Surgery: Effect on Communication and Surgical Management</dc:title>
			<dc:creator>Cécile O. Muller</dc:creator>
			<dc:creator>Lydia Helbling</dc:creator>
			<dc:creator>Theodoros Xydias</dc:creator>
			<dc:creator>Jeanette Greiner</dc:creator>
			<dc:creator>Valérie Oesch</dc:creator>
			<dc:creator>Henrik Köhler</dc:creator>
			<dc:creator>Tim Ohletz</dc:creator>
			<dc:creator>Jatta Berberat</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020056</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-26</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-26</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>56</prism:startingPage>
		<prism:doi>10.3390/jimaging12020056</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/56</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/55">

	<title>J. Imaging, Vol. 12, Pages 55: Capacity-Limited Failure in Approximate Nearest Neighbor Search on Image Embedding Spaces</title>
	<link>https://www.mdpi.com/2313-433X/12/2/55</link>
	<description>Similarity search on image embeddings is a common practice for image retrieval in machine learning and pattern recognition systems. Approximate nearest neighbor (ANN) methods enable scalable similarity search on large datasets, often approaching sub-linear complexity. Yet, little empirical work has examined how ANN neighborhood geometry differs from that of exact k-nearest neighbors (k-NN) search as the neighborhood size increases under constrained search effort. This study quantifies how approximate neighborhood structure changes relative to exact k-NN search as k increases across three experimental conditions. Using multiple random subsets of 10,000 images drawn from the STL-10 dataset, we compute ResNet-50 image embeddings, perform an exact k-NN search, and compare it to a Hierarchical Navigable Small World (HNSW)-based ANN search under controlled hyperparameter regimes. We evaluated the fidelity of neighborhood structure using neighborhood overlap, average neighbor distance, normalized barycenter shift, and local intrinsic dimensionality (LID). Results show that exact k-NN and ANN search behave nearly identically when efSearch&amp;amp;gt;k. However, as the neighborhood size grows and efSearch remains fixed, ANN search fails abruptly, exhibiting extreme divergence in neighbor distances at approximately k&amp;amp;asymp;2&amp;amp;ndash;3.5&amp;amp;times;efSearch. Increasing index construction quality delays this failure, and scaling search effort proportionally with neighborhood size (efSearch=&amp;amp;alpha;&amp;amp;times;k with &amp;amp;alpha;&amp;amp;ge;1) preserves neighborhood geometry across all evaluated metrics, including LID. The findings indicate that ANN search preserves neighborhood geometry within its operational capacity but abruptly fails when this capacity is exceeded. Documenting this behavior is relevant for scientific applications that approximate embedding spaces and provides practical guidance on when ANN search is interchangeable with exact k-NN and when geometric differences become nontrivial.</description>
	<pubDate>2026-01-25</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 55: Capacity-Limited Failure in Approximate Nearest Neighbor Search on Image Embedding Spaces</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/55">doi: 10.3390/jimaging12020055</a></p>
	<p>Authors:
		Morgan Roy Cooper
		Mike Busch
		</p>
	<p>Similarity search on image embeddings is a common practice for image retrieval in machine learning and pattern recognition systems. Approximate nearest neighbor (ANN) methods enable scalable similarity search on large datasets, often approaching sub-linear complexity. Yet, little empirical work has examined how ANN neighborhood geometry differs from that of exact k-nearest neighbors (k-NN) search as the neighborhood size increases under constrained search effort. This study quantifies how approximate neighborhood structure changes relative to exact k-NN search as k increases across three experimental conditions. Using multiple random subsets of 10,000 images drawn from the STL-10 dataset, we compute ResNet-50 image embeddings, perform an exact k-NN search, and compare it to a Hierarchical Navigable Small World (HNSW)-based ANN search under controlled hyperparameter regimes. We evaluated the fidelity of neighborhood structure using neighborhood overlap, average neighbor distance, normalized barycenter shift, and local intrinsic dimensionality (LID). Results show that exact k-NN and ANN search behave nearly identically when efSearch&amp;amp;gt;k. However, as the neighborhood size grows and efSearch remains fixed, ANN search fails abruptly, exhibiting extreme divergence in neighbor distances at approximately k&amp;amp;asymp;2&amp;amp;ndash;3.5&amp;amp;times;efSearch. Increasing index construction quality delays this failure, and scaling search effort proportionally with neighborhood size (efSearch=&amp;amp;alpha;&amp;amp;times;k with &amp;amp;alpha;&amp;amp;ge;1) preserves neighborhood geometry across all evaluated metrics, including LID. The findings indicate that ANN search preserves neighborhood geometry within its operational capacity but abruptly fails when this capacity is exceeded. Documenting this behavior is relevant for scientific applications that approximate embedding spaces and provides practical guidance on when ANN search is interchangeable with exact k-NN and when geometric differences become nontrivial.</p>
	]]></content:encoded>

	<dc:title>Capacity-Limited Failure in Approximate Nearest Neighbor Search on Image Embedding Spaces</dc:title>
			<dc:creator>Morgan Roy Cooper</dc:creator>
			<dc:creator>Mike Busch</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020055</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-25</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-25</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>55</prism:startingPage>
		<prism:doi>10.3390/jimaging12020055</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/55</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/2/54">

	<title>J. Imaging, Vol. 12, Pages 54: A Robust Skeletonization Method for High-Density Fringe Patterns in Holographic Interferometry Based on Parametric Modeling and Strip Integration</title>
	<link>https://www.mdpi.com/2313-433X/12/2/54</link>
	<description>Accurate displacement field measurement by holographic interferometry requires robust analysis of high-density fringe patterns, which is hindered by speckle noise inherent in any interferogram, no matter how perfect. Conventional skeletonization methods, such as edge detection algorithms and active contour models, often fail under these conditions, producing fragmented and unreliable fringe contours. This paper presents a novel skeletonization procedure that simultaneously addresses three fundamental challenges: (1) topology preservation&amp;amp;mdash;by representing the fringe family within a physics-informed, finite-dimensional parametric subspace (e.g., Fourier-based contours), ensuring global smoothness, connectivity, and correct nesting of each fringe; (2) extreme noise robustness&amp;amp;mdash;through a robust strip integration functional that replaces noisy point sampling with Gaussian-weighted intensity averaging across a narrow strip, effectively suppressing speckle while yielding a smooth objective function suitable for gradient-based optimization; and (3) sub-pixel accuracy without phase extraction&amp;amp;mdash;leveraging continuous bicubic interpolation within a recursive quasi-optimization framework that exploits fringe similarity for precise and stable contour localization. The method&amp;amp;rsquo;s performance is quantitatively validated on synthetic interferograms with controlled noise, demonstrating significantly lower error compared to baseline techniques. Practical utility is confirmed by successful processing of a real interferogram of a bent plate containing over 100 fringes, enabling precise displacement field reconstruction that closely matches independent theoretical modeling. The proposed procedure provides a reliable tool for processing challenging interferograms where traditional methods fail to deliver satisfactory results.</description>
	<pubDate>2026-01-24</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 54: A Robust Skeletonization Method for High-Density Fringe Patterns in Holographic Interferometry Based on Parametric Modeling and Strip Integration</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/2/54">doi: 10.3390/jimaging12020054</a></p>
	<p>Authors:
		Sergey Lychev
		Alexander Digilov
		</p>
	<p>Accurate displacement field measurement by holographic interferometry requires robust analysis of high-density fringe patterns, which is hindered by speckle noise inherent in any interferogram, no matter how perfect. Conventional skeletonization methods, such as edge detection algorithms and active contour models, often fail under these conditions, producing fragmented and unreliable fringe contours. This paper presents a novel skeletonization procedure that simultaneously addresses three fundamental challenges: (1) topology preservation&amp;amp;mdash;by representing the fringe family within a physics-informed, finite-dimensional parametric subspace (e.g., Fourier-based contours), ensuring global smoothness, connectivity, and correct nesting of each fringe; (2) extreme noise robustness&amp;amp;mdash;through a robust strip integration functional that replaces noisy point sampling with Gaussian-weighted intensity averaging across a narrow strip, effectively suppressing speckle while yielding a smooth objective function suitable for gradient-based optimization; and (3) sub-pixel accuracy without phase extraction&amp;amp;mdash;leveraging continuous bicubic interpolation within a recursive quasi-optimization framework that exploits fringe similarity for precise and stable contour localization. The method&amp;amp;rsquo;s performance is quantitatively validated on synthetic interferograms with controlled noise, demonstrating significantly lower error compared to baseline techniques. Practical utility is confirmed by successful processing of a real interferogram of a bent plate containing over 100 fringes, enabling precise displacement field reconstruction that closely matches independent theoretical modeling. The proposed procedure provides a reliable tool for processing challenging interferograms where traditional methods fail to deliver satisfactory results.</p>
	]]></content:encoded>

	<dc:title>A Robust Skeletonization Method for High-Density Fringe Patterns in Holographic Interferometry Based on Parametric Modeling and Strip Integration</dc:title>
			<dc:creator>Sergey Lychev</dc:creator>
			<dc:creator>Alexander Digilov</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12020054</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-24</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-24</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>2</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>54</prism:startingPage>
		<prism:doi>10.3390/jimaging12020054</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/2/54</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/53">

	<title>J. Imaging, Vol. 12, Pages 53: Non-Invasive Detection of Prostate Cancer with Novel Time-Dependent Diffusion MRI and AI-Enhanced Quantitative Radiological Interpretation: PROS-TD-AI</title>
	<link>https://www.mdpi.com/2313-433X/12/1/53</link>
	<description>Prostate cancer (PCa) is the most common malignancy in men worldwide. Multiparametric MRI (mpMRI) improves the detection of clinically significant PCa (csPCa); however, it remains limited by false-positive findings and inter-observer variability. Time-dependent diffusion (TDD) MRI provides microstructural information that may enhance csPCa characterization beyond standard mpMRI. This prospective observational diagnostic accuracy study protocol describes the evaluation of PROS-TD-AI, an in-house developed AI workflow integrating TDD-derived metrics for zone-aware csPCa risk prediction. PROS-TD-AI will be compared with PI-RADS v2.1 in routine clinical imaging using MRI-targeted prostate biopsy as the reference standard.</description>
	<pubDate>2026-01-22</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 53: Non-Invasive Detection of Prostate Cancer with Novel Time-Dependent Diffusion MRI and AI-Enhanced Quantitative Radiological Interpretation: PROS-TD-AI</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/53">doi: 10.3390/jimaging12010053</a></p>
	<p>Authors:
		Baltasar Ramos
		Cristian Garrido
		Paulette Narváez
		Santiago Gelerstein Claro
		Haotian Li
		Rafael Salvador
		Constanza Vásquez-Venegas
		Iván Gallegos
		Víctor Castañeda
		Cristian Acevedo
		Gonzalo Cárdenas
		Camilo G. Sotomayor
		</p>
	<p>Prostate cancer (PCa) is the most common malignancy in men worldwide. Multiparametric MRI (mpMRI) improves the detection of clinically significant PCa (csPCa); however, it remains limited by false-positive findings and inter-observer variability. Time-dependent diffusion (TDD) MRI provides microstructural information that may enhance csPCa characterization beyond standard mpMRI. This prospective observational diagnostic accuracy study protocol describes the evaluation of PROS-TD-AI, an in-house developed AI workflow integrating TDD-derived metrics for zone-aware csPCa risk prediction. PROS-TD-AI will be compared with PI-RADS v2.1 in routine clinical imaging using MRI-targeted prostate biopsy as the reference standard.</p>
	]]></content:encoded>

	<dc:title>Non-Invasive Detection of Prostate Cancer with Novel Time-Dependent Diffusion MRI and AI-Enhanced Quantitative Radiological Interpretation: PROS-TD-AI</dc:title>
			<dc:creator>Baltasar Ramos</dc:creator>
			<dc:creator>Cristian Garrido</dc:creator>
			<dc:creator>Paulette Narváez</dc:creator>
			<dc:creator>Santiago Gelerstein Claro</dc:creator>
			<dc:creator>Haotian Li</dc:creator>
			<dc:creator>Rafael Salvador</dc:creator>
			<dc:creator>Constanza Vásquez-Venegas</dc:creator>
			<dc:creator>Iván Gallegos</dc:creator>
			<dc:creator>Víctor Castañeda</dc:creator>
			<dc:creator>Cristian Acevedo</dc:creator>
			<dc:creator>Gonzalo Cárdenas</dc:creator>
			<dc:creator>Camilo G. Sotomayor</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010053</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-22</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-22</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Study Protocol</prism:section>
	<prism:startingPage>53</prism:startingPage>
		<prism:doi>10.3390/jimaging12010053</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/53</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/52">

	<title>J. Imaging, Vol. 12, Pages 52: Multi-Frequency GPR Image Fusion Based on Convolutional Sparse Representation to Enhance Road Detection</title>
	<link>https://www.mdpi.com/2313-433X/12/1/52</link>
	<description>Single-frequency ground penetrating radar (GPR) systems are fundamentally constrained by a trade-off between penetration depth and resolution, alongside issues like narrow bandwidth and ringing interference. To break this limitation, we have developed a multi-frequency data fusion technique grounded in convolutional sparse representation (CSR). The proposed methodology involves spatially registering multi-frequency GPR signals and fusing them via a CSR framework, where the convolutional dictionaries are derived from simulated high-definition GPR data. Extensive evaluation using information entropy, average gradient, mutual information, and visual information fidelity demonstrates the superiority of our method over traditional fusion approaches (e.g., weighted average, PCA, 2D wavelets). Tests on simulated and real data confirm that our CSR-based fusion successfully synergizes the deep penetration of low frequencies with the fine resolution of high frequencies, leading to substantial gains in GPR image clarity and interpretability.</description>
	<pubDate>2026-01-22</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 52: Multi-Frequency GPR Image Fusion Based on Convolutional Sparse Representation to Enhance Road Detection</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/52">doi: 10.3390/jimaging12010052</a></p>
	<p>Authors:
		Liang Fang
		Feng Yang
		Yuanjing Fang
		Junli Nie
		</p>
	<p>Single-frequency ground penetrating radar (GPR) systems are fundamentally constrained by a trade-off between penetration depth and resolution, alongside issues like narrow bandwidth and ringing interference. To break this limitation, we have developed a multi-frequency data fusion technique grounded in convolutional sparse representation (CSR). The proposed methodology involves spatially registering multi-frequency GPR signals and fusing them via a CSR framework, where the convolutional dictionaries are derived from simulated high-definition GPR data. Extensive evaluation using information entropy, average gradient, mutual information, and visual information fidelity demonstrates the superiority of our method over traditional fusion approaches (e.g., weighted average, PCA, 2D wavelets). Tests on simulated and real data confirm that our CSR-based fusion successfully synergizes the deep penetration of low frequencies with the fine resolution of high frequencies, leading to substantial gains in GPR image clarity and interpretability.</p>
	]]></content:encoded>

	<dc:title>Multi-Frequency GPR Image Fusion Based on Convolutional Sparse Representation to Enhance Road Detection</dc:title>
			<dc:creator>Liang Fang</dc:creator>
			<dc:creator>Feng Yang</dc:creator>
			<dc:creator>Yuanjing Fang</dc:creator>
			<dc:creator>Junli Nie</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010052</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-22</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-22</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>52</prism:startingPage>
		<prism:doi>10.3390/jimaging12010052</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/52</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/51">

	<title>J. Imaging, Vol. 12, Pages 51: Interpretable Diagnosis of Pulmonary Emphysema on Low-Dose CT Using ResNet Embeddings</title>
	<link>https://www.mdpi.com/2313-433X/12/1/51</link>
	<description>Accurate and interpretable detection of pulmonary emphysema on low-dose computed tomography (LDCT) remains a critical challenge for large-scale screening and population health studies. This work proposes a quality-controlled and interpretable deep learning pipeline for emphysema assessment using ResNet-152 embeddings. The pipeline integrates automated lung segmentation, quality-control filtering, and extraction of 2048-dimensional embeddings from mid-lung patches, followed by analysis using logistic regression, LASSO, and recursive feature elimination (RFE). The embeddings are further fused with quantitative CT (QCT) markers, including %LAA, Perc15, and total lung volume (TLV), to enhance robustness and interpretability. Bootstrapped validation demonstrates strong diagnostic performance (ROC-AUC = 0.996, PR-AUC = 0.962, balanced accuracy = 0.931) with low computational cost. The proposed approach shows that ResNet embeddings pretrained on CT data can be effectively reused without retraining for emphysema characterization, providing a reproducible and explainable framework suitable as a research and screening-support framework for population-level LDCT analysis.</description>
	<pubDate>2026-01-21</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 51: Interpretable Diagnosis of Pulmonary Emphysema on Low-Dose CT Using ResNet Embeddings</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/51">doi: 10.3390/jimaging12010051</a></p>
	<p>Authors:
		Talshyn Sarsembayeva
		Madina Mansurova
		Ainash Oshibayeva
		Stepan Serebryakov
		</p>
	<p>Accurate and interpretable detection of pulmonary emphysema on low-dose computed tomography (LDCT) remains a critical challenge for large-scale screening and population health studies. This work proposes a quality-controlled and interpretable deep learning pipeline for emphysema assessment using ResNet-152 embeddings. The pipeline integrates automated lung segmentation, quality-control filtering, and extraction of 2048-dimensional embeddings from mid-lung patches, followed by analysis using logistic regression, LASSO, and recursive feature elimination (RFE). The embeddings are further fused with quantitative CT (QCT) markers, including %LAA, Perc15, and total lung volume (TLV), to enhance robustness and interpretability. Bootstrapped validation demonstrates strong diagnostic performance (ROC-AUC = 0.996, PR-AUC = 0.962, balanced accuracy = 0.931) with low computational cost. The proposed approach shows that ResNet embeddings pretrained on CT data can be effectively reused without retraining for emphysema characterization, providing a reproducible and explainable framework suitable as a research and screening-support framework for population-level LDCT analysis.</p>
	]]></content:encoded>

	<dc:title>Interpretable Diagnosis of Pulmonary Emphysema on Low-Dose CT Using ResNet Embeddings</dc:title>
			<dc:creator>Talshyn Sarsembayeva</dc:creator>
			<dc:creator>Madina Mansurova</dc:creator>
			<dc:creator>Ainash Oshibayeva</dc:creator>
			<dc:creator>Stepan Serebryakov</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010051</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-21</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-21</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>51</prism:startingPage>
		<prism:doi>10.3390/jimaging12010051</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/51</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/50">

	<title>J. Imaging, Vol. 12, Pages 50: ADAM-Net: Anatomy-Guided Attentive Unsupervised Domain Adaptation for Joint MG Segmentation and MGD Grading</title>
	<link>https://www.mdpi.com/2313-433X/12/1/50</link>
	<description>Meibomian gland dysfunction (MGD) is a leading cause of dry eye disease, assessable through gland atrophy degree. While deep learning (DL) has advanced meibomian gland (MG) segmentation and MGD classification, existing methods treat these tasks independently and suffer from domain shift across multi-center imaging devices. We propose ADAM-Net, an attention-guided unsupervised domain adaptation multi-task framework that jointly models MG segmentation and MGD classification. Our model introduces structure-aware multi-task learning and anatomy-guided attention to enhance feature sharing, suppress background noise, and improve glandular region perception. For the cross-domain tasks MGD-1K&amp;amp;rarr;{K5M, CR-2, LV II}, this study systematically evaluates the overall performance of ADAM-Net from multiple perspectives. The experimental results show that ADAM-Net achieves classification accuracies of 77.93%, 74.86%, and 81.77% on the target domains, significantly outperforming current mainstream unsupervised domain adaptation (UDA) methods. The F1-score and the Matthews correlation coefficient (MCC-score) indicate that the model maintains robust discriminative capability even under class-imbalanced scenarios. t-SNE visualizations further validate its cross-domain feature alignment capability. These demonstrate that ADAM-Net exhibits strong robustness and interpretability in multi-center scenarios and provide an effective solution for automated MGD assessment.</description>
	<pubDate>2026-01-21</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 50: ADAM-Net: Anatomy-Guided Attentive Unsupervised Domain Adaptation for Joint MG Segmentation and MGD Grading</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/50">doi: 10.3390/jimaging12010050</a></p>
	<p>Authors:
		Junbin Fang
		Xuan He
		You Jiang
		Mini Han Wang
		</p>
	<p>Meibomian gland dysfunction (MGD) is a leading cause of dry eye disease, assessable through gland atrophy degree. While deep learning (DL) has advanced meibomian gland (MG) segmentation and MGD classification, existing methods treat these tasks independently and suffer from domain shift across multi-center imaging devices. We propose ADAM-Net, an attention-guided unsupervised domain adaptation multi-task framework that jointly models MG segmentation and MGD classification. Our model introduces structure-aware multi-task learning and anatomy-guided attention to enhance feature sharing, suppress background noise, and improve glandular region perception. For the cross-domain tasks MGD-1K&amp;amp;rarr;{K5M, CR-2, LV II}, this study systematically evaluates the overall performance of ADAM-Net from multiple perspectives. The experimental results show that ADAM-Net achieves classification accuracies of 77.93%, 74.86%, and 81.77% on the target domains, significantly outperforming current mainstream unsupervised domain adaptation (UDA) methods. The F1-score and the Matthews correlation coefficient (MCC-score) indicate that the model maintains robust discriminative capability even under class-imbalanced scenarios. t-SNE visualizations further validate its cross-domain feature alignment capability. These demonstrate that ADAM-Net exhibits strong robustness and interpretability in multi-center scenarios and provide an effective solution for automated MGD assessment.</p>
	]]></content:encoded>

	<dc:title>ADAM-Net: Anatomy-Guided Attentive Unsupervised Domain Adaptation for Joint MG Segmentation and MGD Grading</dc:title>
			<dc:creator>Junbin Fang</dc:creator>
			<dc:creator>Xuan He</dc:creator>
			<dc:creator>You Jiang</dc:creator>
			<dc:creator>Mini Han Wang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010050</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-21</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-21</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>50</prism:startingPage>
		<prism:doi>10.3390/jimaging12010050</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/50</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/49">

	<title>J. Imaging, Vol. 12, Pages 49: Chest Radiography Optimization: Identifying the Optimal kV for Image Quality in a Phantom Study</title>
	<link>https://www.mdpi.com/2313-433X/12/1/49</link>
	<description>Chest radiography remains one of the most frequently performed imaging examinations, highlighting the need for optimization of acquisition parameters to balance image quality and radiation dose. This study presents a phantom-based quantitative evaluation of chest radiography acquisition settings using a digital radiography system (AGFA DR 600). Measurements were performed at three tube voltage levels across simulated patient-equivalent thicknesses generated using PMMA slabs, with a Leeds TOR 15FG image quality phantom positioned centrally in the imaging setup. Image quality was quantitatively assessed using signal-to-noise ratio (SNR) and contrast-to-noise ratio (CNR), which were calculated from mean pixel values obtained from repeated acquisitions. Radiation exposure was evaluated through estimation of entrance surface dose (ESD). The analysis demonstrated that dose-normalized performance metrics favored intermediate tube voltages for slim and average patient-equivalent thicknesses, while higher voltages were required to maintain image quality in obese-equivalent conditions. Overall, image quality and dose were found to be strongly dependent on the combined selection of tube voltage and phantom thickness. These findings indicate that modest adjustments to tube voltage selection may improve the balance between image quality and radiation dose in chest radiography. Nevertheless, as the present work is based on phantom measurements, further validation using clinical images and observer-based studies is required before any modification of routine radiographic practice.</description>
	<pubDate>2026-01-21</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 49: Chest Radiography Optimization: Identifying the Optimal kV for Image Quality in a Phantom Study</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/49">doi: 10.3390/jimaging12010049</a></p>
	<p>Authors:
		Ioannis Antonakos
		Kyriakos Kokkinogoulis
		Maria Giannopoulou
		Efstathios P. Efstathopoulos
		</p>
	<p>Chest radiography remains one of the most frequently performed imaging examinations, highlighting the need for optimization of acquisition parameters to balance image quality and radiation dose. This study presents a phantom-based quantitative evaluation of chest radiography acquisition settings using a digital radiography system (AGFA DR 600). Measurements were performed at three tube voltage levels across simulated patient-equivalent thicknesses generated using PMMA slabs, with a Leeds TOR 15FG image quality phantom positioned centrally in the imaging setup. Image quality was quantitatively assessed using signal-to-noise ratio (SNR) and contrast-to-noise ratio (CNR), which were calculated from mean pixel values obtained from repeated acquisitions. Radiation exposure was evaluated through estimation of entrance surface dose (ESD). The analysis demonstrated that dose-normalized performance metrics favored intermediate tube voltages for slim and average patient-equivalent thicknesses, while higher voltages were required to maintain image quality in obese-equivalent conditions. Overall, image quality and dose were found to be strongly dependent on the combined selection of tube voltage and phantom thickness. These findings indicate that modest adjustments to tube voltage selection may improve the balance between image quality and radiation dose in chest radiography. Nevertheless, as the present work is based on phantom measurements, further validation using clinical images and observer-based studies is required before any modification of routine radiographic practice.</p>
	]]></content:encoded>

	<dc:title>Chest Radiography Optimization: Identifying the Optimal kV for Image Quality in a Phantom Study</dc:title>
			<dc:creator>Ioannis Antonakos</dc:creator>
			<dc:creator>Kyriakos Kokkinogoulis</dc:creator>
			<dc:creator>Maria Giannopoulou</dc:creator>
			<dc:creator>Efstathios P. Efstathopoulos</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010049</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-21</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-21</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>49</prism:startingPage>
		<prism:doi>10.3390/jimaging12010049</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/49</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/48">

	<title>J. Imaging, Vol. 12, Pages 48: Graph-Enhanced Expectation Maximization for Emission Tomography</title>
	<link>https://www.mdpi.com/2313-433X/12/1/48</link>
	<description>Emission tomography, including single-photon emission computed tomography (SPECT), requires image reconstruction from noisy and incomplete projection data. The maximum-likelihood expectation maximization (MLEM) algorithm is widely used due to its statistical foundation and non-negativity preservation, but it is highly sensitive to noise, particularly in low-count conditions. Although total variation (TV) regularization can reduce noise, it often oversmooths structural details and requires careful parameter tuning. We propose a Graph-Enhanced Expectation Maximization (GREM) algorithm that incorporates graph-based neighborhood information into an MLEM-type multiplicative reconstruction scheme. The method is motivated by a penalized formulation combining a Kullback&amp;amp;ndash;Leibler divergence term with a graph Laplacian regularization term, promoting local structural consistency while preserving edges. The resulting update retains the multiplicative structure of MLEM and preserves the non-negativity of the image estimates. Numerical experiments using synthetic phantoms under multiple noise levels, as well as clinical 99mTc-GSA liver SPECT data, demonstrate that GREM consistently outperforms conventional MLEM and TV-regularized MLEM in terms of PSNR and MS-SSIM. These results indicate that GREM provides an effective and practical approach for edge-preserving noise suppression in emission tomography without relying on external training data.</description>
	<pubDate>2026-01-20</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 48: Graph-Enhanced Expectation Maximization for Emission Tomography</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/48">doi: 10.3390/jimaging12010048</a></p>
	<p>Authors:
		Ryosuke Kasai
		Hideki Otsuka
		</p>
	<p>Emission tomography, including single-photon emission computed tomography (SPECT), requires image reconstruction from noisy and incomplete projection data. The maximum-likelihood expectation maximization (MLEM) algorithm is widely used due to its statistical foundation and non-negativity preservation, but it is highly sensitive to noise, particularly in low-count conditions. Although total variation (TV) regularization can reduce noise, it often oversmooths structural details and requires careful parameter tuning. We propose a Graph-Enhanced Expectation Maximization (GREM) algorithm that incorporates graph-based neighborhood information into an MLEM-type multiplicative reconstruction scheme. The method is motivated by a penalized formulation combining a Kullback&amp;amp;ndash;Leibler divergence term with a graph Laplacian regularization term, promoting local structural consistency while preserving edges. The resulting update retains the multiplicative structure of MLEM and preserves the non-negativity of the image estimates. Numerical experiments using synthetic phantoms under multiple noise levels, as well as clinical 99mTc-GSA liver SPECT data, demonstrate that GREM consistently outperforms conventional MLEM and TV-regularized MLEM in terms of PSNR and MS-SSIM. These results indicate that GREM provides an effective and practical approach for edge-preserving noise suppression in emission tomography without relying on external training data.</p>
	]]></content:encoded>

	<dc:title>Graph-Enhanced Expectation Maximization for Emission Tomography</dc:title>
			<dc:creator>Ryosuke Kasai</dc:creator>
			<dc:creator>Hideki Otsuka</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010048</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-20</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-20</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>48</prism:startingPage>
		<prism:doi>10.3390/jimaging12010048</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/48</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/47">

	<title>J. Imaging, Vol. 12, Pages 47: Automatic Retinal Nerve Fiber Segmentation and the Influence of Intersubject Variability in Ocular Parameters on the Mapping of Retinal Sites to the Pointwise Orientation Angles</title>
	<link>https://www.mdpi.com/2313-433X/12/1/47</link>
	<description>The current study investigates the influence of intersubject variability in ocular characteristics on the mapping of visual field (VF) sites to the pointwise directional angles in retinal nerve fiber layer (RNFL) bundle traces. In addition, the performance efficacy on the mapping of VF sites to the optic nerve head (ONH) was compared to ground truth baselines. Fundus photographs of 546 eyes of 546 healthy subjects (with no history of ocular disease or diabetic retinopathy) were enhanced digitally and RNFL bundle traces were segmented based on the Personalized Estimated Segmentation (PES) algorithm&amp;amp;rsquo;s core technique. A 24-2 VF grid pattern was overlaid onto the photographs in order to relate VF test points to intersecting RNFL bundles. The PES algorithm effectively traced RNFL bundles in fundus images, achieving an average accuracy of 97.6% relative to the Jansonius map through the application of 10th-order Bezier curves. The PES algorithm assembled an average of 4726 RNFL bundles per fundus image based on 4975 sampling points, obtaining a total of 2,580,505 RNFL bundles based on 2,716,321 sampling points. The influence of ocular parameters could be evaluated for 34 out of 52 VF locations. The ONH-fovea angle and the ONH position in relation to the fovea were the most prominent predictors for variations in the mapping of retinal locations to the pointwise directional angle (p &amp;amp;lt; 0.001). The variation explained by the model (R2 value) ranges from 27.6% for visual field location 15 to 77.8% in location 22, with a mean of 56%. Significant individual variability was found in the mapping of VF sites to the ONH, with a mean standard deviation (95% limit) of 16.55&amp;amp;deg; (median 17.68&amp;amp;deg;) for 50 out of 52 VF locations, ranging from less than 1&amp;amp;deg; to 44.05&amp;amp;deg;. The mean entry angles differed from previous baselines by a range of less than 1&amp;amp;deg; to 23.9&amp;amp;deg; (average difference of 10.6&amp;amp;deg; &amp;amp;plusmn; 5.53&amp;amp;deg;), and RMSE of 11.94.</description>
	<pubDate>2026-01-19</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 47: Automatic Retinal Nerve Fiber Segmentation and the Influence of Intersubject Variability in Ocular Parameters on the Mapping of Retinal Sites to the Pointwise Orientation Angles</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/47">doi: 10.3390/jimaging12010047</a></p>
	<p>Authors:
		Diego Luján Villarreal
		Adriana Leticia Vera-Tizatl
		</p>
	<p>The current study investigates the influence of intersubject variability in ocular characteristics on the mapping of visual field (VF) sites to the pointwise directional angles in retinal nerve fiber layer (RNFL) bundle traces. In addition, the performance efficacy on the mapping of VF sites to the optic nerve head (ONH) was compared to ground truth baselines. Fundus photographs of 546 eyes of 546 healthy subjects (with no history of ocular disease or diabetic retinopathy) were enhanced digitally and RNFL bundle traces were segmented based on the Personalized Estimated Segmentation (PES) algorithm&amp;amp;rsquo;s core technique. A 24-2 VF grid pattern was overlaid onto the photographs in order to relate VF test points to intersecting RNFL bundles. The PES algorithm effectively traced RNFL bundles in fundus images, achieving an average accuracy of 97.6% relative to the Jansonius map through the application of 10th-order Bezier curves. The PES algorithm assembled an average of 4726 RNFL bundles per fundus image based on 4975 sampling points, obtaining a total of 2,580,505 RNFL bundles based on 2,716,321 sampling points. The influence of ocular parameters could be evaluated for 34 out of 52 VF locations. The ONH-fovea angle and the ONH position in relation to the fovea were the most prominent predictors for variations in the mapping of retinal locations to the pointwise directional angle (p &amp;amp;lt; 0.001). The variation explained by the model (R2 value) ranges from 27.6% for visual field location 15 to 77.8% in location 22, with a mean of 56%. Significant individual variability was found in the mapping of VF sites to the ONH, with a mean standard deviation (95% limit) of 16.55&amp;amp;deg; (median 17.68&amp;amp;deg;) for 50 out of 52 VF locations, ranging from less than 1&amp;amp;deg; to 44.05&amp;amp;deg;. The mean entry angles differed from previous baselines by a range of less than 1&amp;amp;deg; to 23.9&amp;amp;deg; (average difference of 10.6&amp;amp;deg; &amp;amp;plusmn; 5.53&amp;amp;deg;), and RMSE of 11.94.</p>
	]]></content:encoded>

	<dc:title>Automatic Retinal Nerve Fiber Segmentation and the Influence of Intersubject Variability in Ocular Parameters on the Mapping of Retinal Sites to the Pointwise Orientation Angles</dc:title>
			<dc:creator>Diego Luján Villarreal</dc:creator>
			<dc:creator>Adriana Leticia Vera-Tizatl</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010047</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-19</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-19</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>47</prism:startingPage>
		<prism:doi>10.3390/jimaging12010047</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/47</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/46">

	<title>J. Imaging, Vol. 12, Pages 46: A Dual Stream Deep Learning Framework for Alzheimer&amp;rsquo;s Disease Detection Using MRI Sonification</title>
	<link>https://www.mdpi.com/2313-433X/12/1/46</link>
	<description>Alzheimer&amp;amp;rsquo;s Disease (AD) is an advanced brain illness that affects millions of individuals across the world. It causes gradual damage to the brain cells, leading to memory loss and cognitive dysfunction. Although Magnetic Resonance Imaging (MRI) is widely used in AD diagnosis, the existing studies rely solely on the visual representations, leaving alternative features unexplored. The objective of this study is to explore whether MRI sonification can provide complementary diagnostic information when combined with conventional image-based methods. In this study, we propose a novel dual-stream multimodal framework that integrates 2D MRI slices with their corresponding audio representations. MRI images are transformed into audio signals using a multi-scale, multi-orientation Gabor filtering, followed by a Hilbert space-filling curve to preserve spatial locality. The image and sound modalities are processed using a lightweight CNN and YAMNet, respectively, then fused via logistic regression. The experimental results of the multimodal achieved the highest accuracy in distinguishing AD from Cognitively Normal (CN) subjects at 98.2%, 94% for AD vs. Mild Cognitive Impairment (MCI), and 93.2% for MCI vs. CN. This work provides a new perspective and highlights the potential of audio transformation of imaging data for feature extraction and classification.</description>
	<pubDate>2026-01-15</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 46: A Dual Stream Deep Learning Framework for Alzheimer&amp;rsquo;s Disease Detection Using MRI Sonification</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/46">doi: 10.3390/jimaging12010046</a></p>
	<p>Authors:
		Nadia A. Mohsin
		Mohammed H. Abdul Ameer
		</p>
	<p>Alzheimer&amp;amp;rsquo;s Disease (AD) is an advanced brain illness that affects millions of individuals across the world. It causes gradual damage to the brain cells, leading to memory loss and cognitive dysfunction. Although Magnetic Resonance Imaging (MRI) is widely used in AD diagnosis, the existing studies rely solely on the visual representations, leaving alternative features unexplored. The objective of this study is to explore whether MRI sonification can provide complementary diagnostic information when combined with conventional image-based methods. In this study, we propose a novel dual-stream multimodal framework that integrates 2D MRI slices with their corresponding audio representations. MRI images are transformed into audio signals using a multi-scale, multi-orientation Gabor filtering, followed by a Hilbert space-filling curve to preserve spatial locality. The image and sound modalities are processed using a lightweight CNN and YAMNet, respectively, then fused via logistic regression. The experimental results of the multimodal achieved the highest accuracy in distinguishing AD from Cognitively Normal (CN) subjects at 98.2%, 94% for AD vs. Mild Cognitive Impairment (MCI), and 93.2% for MCI vs. CN. This work provides a new perspective and highlights the potential of audio transformation of imaging data for feature extraction and classification.</p>
	]]></content:encoded>

	<dc:title>A Dual Stream Deep Learning Framework for Alzheimer&amp;amp;rsquo;s Disease Detection Using MRI Sonification</dc:title>
			<dc:creator>Nadia A. Mohsin</dc:creator>
			<dc:creator>Mohammed H. Abdul Ameer</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010046</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-15</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-15</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>46</prism:startingPage>
		<prism:doi>10.3390/jimaging12010046</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/46</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/45">

	<title>J. Imaging, Vol. 12, Pages 45: A Cross-Device and Cross-OS Benchmark of Modern Web Animation Systems</title>
	<link>https://www.mdpi.com/2313-433X/12/1/45</link>
	<description>Although modern web technologies increasingly rely on high-performance rendering methods to support rich visual content across a range of devices and operating systems, the field remains significantly under-researched. The performance of animated visual elements is affected by numerous factors, including browsers, operating systems, GPU acceleration, scripting load, and device limitations. This study systematically evaluates animation performance across multiple platforms using a unified set of circle-based animations implemented with eight web-compatible technologies, including HTML, CSS, SVG, JavaScript, Canvas, and WebGL. Animations were evaluated under controlled feature combinations involving random motion, distance, colour variation, blending, and transformations, with object counts ranging from 10 to 10,000. Measurements were conducted on desktop operating systems (Windows, macOS, Linux) and mobile platforms (iOS, Android), using CPU utilisation, GPU memory usage, and frame rate (FPS) as key metrics. Results show that DOM-based approaches maintain stable performance at 100 animated objects but exhibit notable degradation by 500 objects. Canvas-based rendering extends usability to higher object counts, while WebGL demonstrates the most stable performance at large scales (5000&amp;amp;ndash;10,000 objects). These findings provide concrete guidance for selecting appropriate animation technologies based on scene complexity and target platform.</description>
	<pubDate>2026-01-15</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 45: A Cross-Device and Cross-OS Benchmark of Modern Web Animation Systems</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/45">doi: 10.3390/jimaging12010045</a></p>
	<p>Authors:
		Tajana Koren Ivančević
		Trpimir Jeronim Ježić
		Nikolina Stanić Loknar
		</p>
	<p>Although modern web technologies increasingly rely on high-performance rendering methods to support rich visual content across a range of devices and operating systems, the field remains significantly under-researched. The performance of animated visual elements is affected by numerous factors, including browsers, operating systems, GPU acceleration, scripting load, and device limitations. This study systematically evaluates animation performance across multiple platforms using a unified set of circle-based animations implemented with eight web-compatible technologies, including HTML, CSS, SVG, JavaScript, Canvas, and WebGL. Animations were evaluated under controlled feature combinations involving random motion, distance, colour variation, blending, and transformations, with object counts ranging from 10 to 10,000. Measurements were conducted on desktop operating systems (Windows, macOS, Linux) and mobile platforms (iOS, Android), using CPU utilisation, GPU memory usage, and frame rate (FPS) as key metrics. Results show that DOM-based approaches maintain stable performance at 100 animated objects but exhibit notable degradation by 500 objects. Canvas-based rendering extends usability to higher object counts, while WebGL demonstrates the most stable performance at large scales (5000&amp;amp;ndash;10,000 objects). These findings provide concrete guidance for selecting appropriate animation technologies based on scene complexity and target platform.</p>
	]]></content:encoded>

	<dc:title>A Cross-Device and Cross-OS Benchmark of Modern Web Animation Systems</dc:title>
			<dc:creator>Tajana Koren Ivančević</dc:creator>
			<dc:creator>Trpimir Jeronim Ježić</dc:creator>
			<dc:creator>Nikolina Stanić Loknar</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010045</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-15</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-15</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>45</prism:startingPage>
		<prism:doi>10.3390/jimaging12010045</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/45</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/44">

	<title>J. Imaging, Vol. 12, Pages 44: A Deep Feature Fusion Underwater Image Enhancement Model Based on Perceptual Vision Swin Transformer</title>
	<link>https://www.mdpi.com/2313-433X/12/1/44</link>
	<description>Underwater optical images are the primary carriers of underwater scene information, playing a crucial role in marine resource exploration, underwater environmental monitoring, and engineering inspection. However, wavelength-dependent absorption and scattering severely deteriorate underwater images, leading to reduced contrast, chromatic distortions, and loss of structural details. To address these issues, we propose a U-shaped underwater image enhancement framework that integrates Swin-Transformer blocks with lightweight attention and residual modules. A Dual-Window Multi-Head Self-Attention (DWMSA) in the bottleneck models long-range context while preserving fine local structure. A Global-Aware Attention Map (GAMP) adaptively re-weights channels and spatial locations to focus on severely degraded regions. A Feature-Augmentation Residual Network (FARN) stabilizes deep training and emphasizes texture and color fidelity. Trained with a combination of Charbonnier, perceptual, and edge losses, our method achieves state-of-the-art results in PSNR and SSIM, the lowest LPIPS, and improvements in UIQM and UCIQE on the UFO-120 and EUVP datasets, with average metrics of PSNR 29.5 dB, SSIM 0.94, LPIPS 0.17, UIQM 3.62, and UCIQE 0.59. Qualitative results show reduced color cast, restored contrast, and sharper details. Code, weights, and evaluation scripts will be released to support reproducibility.</description>
	<pubDate>2026-01-14</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 44: A Deep Feature Fusion Underwater Image Enhancement Model Based on Perceptual Vision Swin Transformer</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/44">doi: 10.3390/jimaging12010044</a></p>
	<p>Authors:
		Shasha Tian
		Adisorn Sirikham
		Jessada Konpang
		Chuyang Wang
		</p>
	<p>Underwater optical images are the primary carriers of underwater scene information, playing a crucial role in marine resource exploration, underwater environmental monitoring, and engineering inspection. However, wavelength-dependent absorption and scattering severely deteriorate underwater images, leading to reduced contrast, chromatic distortions, and loss of structural details. To address these issues, we propose a U-shaped underwater image enhancement framework that integrates Swin-Transformer blocks with lightweight attention and residual modules. A Dual-Window Multi-Head Self-Attention (DWMSA) in the bottleneck models long-range context while preserving fine local structure. A Global-Aware Attention Map (GAMP) adaptively re-weights channels and spatial locations to focus on severely degraded regions. A Feature-Augmentation Residual Network (FARN) stabilizes deep training and emphasizes texture and color fidelity. Trained with a combination of Charbonnier, perceptual, and edge losses, our method achieves state-of-the-art results in PSNR and SSIM, the lowest LPIPS, and improvements in UIQM and UCIQE on the UFO-120 and EUVP datasets, with average metrics of PSNR 29.5 dB, SSIM 0.94, LPIPS 0.17, UIQM 3.62, and UCIQE 0.59. Qualitative results show reduced color cast, restored contrast, and sharper details. Code, weights, and evaluation scripts will be released to support reproducibility.</p>
	]]></content:encoded>

	<dc:title>A Deep Feature Fusion Underwater Image Enhancement Model Based on Perceptual Vision Swin Transformer</dc:title>
			<dc:creator>Shasha Tian</dc:creator>
			<dc:creator>Adisorn Sirikham</dc:creator>
			<dc:creator>Jessada Konpang</dc:creator>
			<dc:creator>Chuyang Wang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010044</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-14</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-14</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>44</prism:startingPage>
		<prism:doi>10.3390/jimaging12010044</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/44</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/43">

	<title>J. Imaging, Vol. 12, Pages 43: FF-Mamba-YOLO: An SSM-Based Benchmark for Forest Fire Detection in UAV Remote Sensing Images</title>
	<link>https://www.mdpi.com/2313-433X/12/1/43</link>
	<description>Timely and accurate detection of forest fires through unmanned aerial vehicle (UAV) remote sensing target detection technology is of paramount importance. However, multiscale targets and complex environmental interference in UAV remote sensing images pose significant challenges during detection tasks. To address these obstacles, this paper presents FF-Mamba-YOLO, a novel framework based on the principles of Mamba and YOLO (You Only Look Once) that leverages innovative modules and architectures to overcome these limitations. Specifically, we introduce MFEBlock and MFFBlock based on state space models (SSMs) in the backbone and neck parts of the network, respectively, enabling the model to effectively capture global dependencies. Second, we construct CFEBlock, a module that performs feature enhancement before SSM processing, improving local feature processing capabilities. Furthermore, we propose MGBlock, which adopts a dynamic gating mechanism, enhancing the model&amp;amp;rsquo;s adaptive processing capabilities and robustness. Finally, we enhance the structure of Path Aggregation Feature Pyramid Network (PAFPN) to improve feature fusion quality and introduce DySample to enhance image resolution without significantly increasing computational costs. Experimental results on our self-constructed forest fire image dataset demonstrate that the model achieves 67.4% mAP@50, 36.3% mAP@50:95, and 64.8% precision, outperforming previous state-of-the-art methods. These results highlight the potential of FF-Mamba-YOLO in forest fire monitoring.</description>
	<pubDate>2026-01-13</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 43: FF-Mamba-YOLO: An SSM-Based Benchmark for Forest Fire Detection in UAV Remote Sensing Images</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/43">doi: 10.3390/jimaging12010043</a></p>
	<p>Authors:
		Binhua Guo
		Dinghui Liu
		Zhou Shen
		Tiebin Wang
		</p>
	<p>Timely and accurate detection of forest fires through unmanned aerial vehicle (UAV) remote sensing target detection technology is of paramount importance. However, multiscale targets and complex environmental interference in UAV remote sensing images pose significant challenges during detection tasks. To address these obstacles, this paper presents FF-Mamba-YOLO, a novel framework based on the principles of Mamba and YOLO (You Only Look Once) that leverages innovative modules and architectures to overcome these limitations. Specifically, we introduce MFEBlock and MFFBlock based on state space models (SSMs) in the backbone and neck parts of the network, respectively, enabling the model to effectively capture global dependencies. Second, we construct CFEBlock, a module that performs feature enhancement before SSM processing, improving local feature processing capabilities. Furthermore, we propose MGBlock, which adopts a dynamic gating mechanism, enhancing the model&amp;amp;rsquo;s adaptive processing capabilities and robustness. Finally, we enhance the structure of Path Aggregation Feature Pyramid Network (PAFPN) to improve feature fusion quality and introduce DySample to enhance image resolution without significantly increasing computational costs. Experimental results on our self-constructed forest fire image dataset demonstrate that the model achieves 67.4% mAP@50, 36.3% mAP@50:95, and 64.8% precision, outperforming previous state-of-the-art methods. These results highlight the potential of FF-Mamba-YOLO in forest fire monitoring.</p>
	]]></content:encoded>

	<dc:title>FF-Mamba-YOLO: An SSM-Based Benchmark for Forest Fire Detection in UAV Remote Sensing Images</dc:title>
			<dc:creator>Binhua Guo</dc:creator>
			<dc:creator>Dinghui Liu</dc:creator>
			<dc:creator>Zhou Shen</dc:creator>
			<dc:creator>Tiebin Wang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010043</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-13</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-13</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>43</prism:startingPage>
		<prism:doi>10.3390/jimaging12010043</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/43</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/42">

	<title>J. Imaging, Vol. 12, Pages 42: GLCN: Graph-Aware Locality-Enhanced Cross-Modality Re-ID Network</title>
	<link>https://www.mdpi.com/2313-433X/12/1/42</link>
	<description>Cross-modality person re-identification faces challenges such as illumination discrepancies, local occlusions, and inconsistent modality structures, leading to misalignment and sensitivity issues. We propose GLCN, a framework that addresses these problems by enhancing representation learning through locality enhancement, cross-modality structural alignment, and intra-modality compactness. Key components include the Locality-Preserved Cross-branch Fusion (LPCF) module, which combines Local&amp;amp;ndash;Positional&amp;amp;ndash;Channel Gating (LPCG) for local region and positional sensitivity; Cross-branch Context Interpolated Attention (CCIA) for stable cross-branch consistency; and Graph-Enhanced Center Geometry Alignment (GE-CGA), which aligns class-center similarity structures across modalities to preserve category-level relationships. We also introduce Intra-Modal Prototype Discrepancy Mining Loss (IPDM-Loss) to reduce intra-class variance and improve inter-class separation, thereby creating more compact identity structures in both RGB and IR spaces. Extensive experiments on SYSU-MM01, RegDB, and other benchmarks demonstrate the effectiveness of our approach.</description>
	<pubDate>2026-01-13</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 42: GLCN: Graph-Aware Locality-Enhanced Cross-Modality Re-ID Network</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/42">doi: 10.3390/jimaging12010042</a></p>
	<p>Authors:
		Junjie Cao
		Yuhang Yu
		Rong Rong
		Xing Xie
		</p>
	<p>Cross-modality person re-identification faces challenges such as illumination discrepancies, local occlusions, and inconsistent modality structures, leading to misalignment and sensitivity issues. We propose GLCN, a framework that addresses these problems by enhancing representation learning through locality enhancement, cross-modality structural alignment, and intra-modality compactness. Key components include the Locality-Preserved Cross-branch Fusion (LPCF) module, which combines Local&amp;amp;ndash;Positional&amp;amp;ndash;Channel Gating (LPCG) for local region and positional sensitivity; Cross-branch Context Interpolated Attention (CCIA) for stable cross-branch consistency; and Graph-Enhanced Center Geometry Alignment (GE-CGA), which aligns class-center similarity structures across modalities to preserve category-level relationships. We also introduce Intra-Modal Prototype Discrepancy Mining Loss (IPDM-Loss) to reduce intra-class variance and improve inter-class separation, thereby creating more compact identity structures in both RGB and IR spaces. Extensive experiments on SYSU-MM01, RegDB, and other benchmarks demonstrate the effectiveness of our approach.</p>
	]]></content:encoded>

	<dc:title>GLCN: Graph-Aware Locality-Enhanced Cross-Modality Re-ID Network</dc:title>
			<dc:creator>Junjie Cao</dc:creator>
			<dc:creator>Yuhang Yu</dc:creator>
			<dc:creator>Rong Rong</dc:creator>
			<dc:creator>Xing Xie</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010042</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-13</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-13</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>42</prism:startingPage>
		<prism:doi>10.3390/jimaging12010042</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/42</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/41">

	<title>J. Imaging, Vol. 12, Pages 41: Calibrated Transformer Fusion for Dual-View Low-Energy CESM Classification</title>
	<link>https://www.mdpi.com/2313-433X/12/1/41</link>
	<description>Contrast-enhanced spectral mammography (CESM) provides low-energy images acquired in standard craniocaudal (CC) and mediolateral oblique (MLO) views, and clinical interpretation relies on integrating both views. This study proposes a dual-view classification framework that combines deep CNN feature extraction with transformer-based fusion for breast-side classification using low-energy (DM) images from CESM acquisitions (Normal vs. Tumorous; benign and malignant merged). The evaluation was conducted using 5-fold stratified group cross-validation with patient-level grouping to prevent leakage across folds. The final configuration (Model E) integrates dual-backbone feature extraction, transformer fusion, MC-dropout inference for uncertainty estimation, and post hoc logistic calibration. Across the five held-out test folds, Model E achieved a mean accuracy of 96.88% ± 2.39% and a mean F1-score of 97.68% ± 1.66%. The mean ROC-AUC and PR-AUC were 0.9915 ± 0.0098 and 0.9968 ± 0.0029, respectively. Probability quality was supported by a mean Brier score of 0.0236 ± 0.0145 and a mean expected calibration error (ECE) of 0.0334 ± 0.0171. An ablation study (Models A–E) was also reported to quantify the incremental contribution of dual-view input, transformer fusion, and uncertainty calibration. Within the limits of this retrospective single-center setting, these results suggest that dual-view transformer fusion can provide strong discrimination while also producing calibrated probabilities and uncertainty outputs that are relevant for decision support.</description>
	<pubDate>2026-01-13</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 41: Calibrated Transformer Fusion for Dual-View Low-Energy CESM Classification</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/41">doi: 10.3390/jimaging12010041</a></p>
	<p>Authors:
		Ahmed Alkurdi
		Amira Sallow
		</p>
	<p>Contrast-enhanced spectral mammography (CESM) provides low-energy images acquired in standard craniocaudal (CC) and mediolateral oblique (MLO) views, and clinical interpretation relies on integrating both views. This study proposes a dual-view classification framework that combines deep CNN feature extraction with transformer-based fusion for breast-side classification using low-energy (DM) images from CESM acquisitions (Normal vs. Tumorous; benign and malignant merged). The evaluation was conducted using 5-fold stratified group cross-validation with patient-level grouping to prevent leakage across folds. The final configuration (Model E) integrates dual-backbone feature extraction, transformer fusion, MC-dropout inference for uncertainty estimation, and post hoc logistic calibration. Across the five held-out test folds, Model E achieved a mean accuracy of 96.88% ± 2.39% and a mean F1-score of 97.68% ± 1.66%. The mean ROC-AUC and PR-AUC were 0.9915 ± 0.0098 and 0.9968 ± 0.0029, respectively. Probability quality was supported by a mean Brier score of 0.0236 ± 0.0145 and a mean expected calibration error (ECE) of 0.0334 ± 0.0171. An ablation study (Models A–E) was also reported to quantify the incremental contribution of dual-view input, transformer fusion, and uncertainty calibration. Within the limits of this retrospective single-center setting, these results suggest that dual-view transformer fusion can provide strong discrimination while also producing calibrated probabilities and uncertainty outputs that are relevant for decision support.</p>
	]]></content:encoded>

	<dc:title>Calibrated Transformer Fusion for Dual-View Low-Energy CESM Classification</dc:title>
			<dc:creator>Ahmed Alkurdi</dc:creator>
			<dc:creator>Amira Sallow</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010041</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-13</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-13</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>41</prism:startingPage>
		<prism:doi>10.3390/jimaging12010041</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/41</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/40">

	<title>J. Imaging, Vol. 12, Pages 40: A Dual-UNet Diffusion Framework for Personalized Panoramic Generation</title>
	<link>https://www.mdpi.com/2313-433X/12/1/40</link>
	<description>While text-to-image and customized generation methods demonstrate strong capabilities in single-image generation, they fall short in supporting immersive applications that require coherent 360&amp;amp;deg; panoramas. Conversely, existing panorama generation models lack customization capabilities. In panoramic scenes, reference objects often appear as minor background elements and may be multiple in number, while reference images across different views exhibit weak correlations. To address these challenges, we propose a diffusion-based framework for customized multi-view image generation. Our approach introduces a decoupled feature injection mechanism within a dual-UNet architecture to handle weakly correlated reference images, effectively integrating spatial information by concurrently feeding both reference images and noise into the denoising branch. A hybrid attention mechanism enables deep fusion of reference features and multi-view representations. Furthermore, a data augmentation strategy facilitates viewpoint-adaptive pose adjustments, and panoramic coordinates are employed to guide multi-view attention. The experimental results demonstrate our model&amp;amp;rsquo;s effectiveness in generating coherent, high-quality customized multi-view images.</description>
	<pubDate>2026-01-11</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 40: A Dual-UNet Diffusion Framework for Personalized Panoramic Generation</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/40">doi: 10.3390/jimaging12010040</a></p>
	<p>Authors:
		Jing Shen
		Leigang Huo
		Chunlei Huo
		Shiming Xiang
		</p>
	<p>While text-to-image and customized generation methods demonstrate strong capabilities in single-image generation, they fall short in supporting immersive applications that require coherent 360&amp;amp;deg; panoramas. Conversely, existing panorama generation models lack customization capabilities. In panoramic scenes, reference objects often appear as minor background elements and may be multiple in number, while reference images across different views exhibit weak correlations. To address these challenges, we propose a diffusion-based framework for customized multi-view image generation. Our approach introduces a decoupled feature injection mechanism within a dual-UNet architecture to handle weakly correlated reference images, effectively integrating spatial information by concurrently feeding both reference images and noise into the denoising branch. A hybrid attention mechanism enables deep fusion of reference features and multi-view representations. Furthermore, a data augmentation strategy facilitates viewpoint-adaptive pose adjustments, and panoramic coordinates are employed to guide multi-view attention. The experimental results demonstrate our model&amp;amp;rsquo;s effectiveness in generating coherent, high-quality customized multi-view images.</p>
	]]></content:encoded>

	<dc:title>A Dual-UNet Diffusion Framework for Personalized Panoramic Generation</dc:title>
			<dc:creator>Jing Shen</dc:creator>
			<dc:creator>Leigang Huo</dc:creator>
			<dc:creator>Chunlei Huo</dc:creator>
			<dc:creator>Shiming Xiang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010040</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-11</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-11</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>40</prism:startingPage>
		<prism:doi>10.3390/jimaging12010040</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/40</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/39">

	<title>J. Imaging, Vol. 12, Pages 39: Self-Supervised Learning of Deep Embeddings for Classification and Identification of Dental Implants</title>
	<link>https://www.mdpi.com/2313-433X/12/1/39</link>
	<description>This study proposes an automated system using deep learning-based object detection to identify implant systems, leveraging recent progress in self-supervised learning, specifically masked image modeling (MIM). We advocate for self-pre-training, emphasizing that its advantages when acquiring suitable pre-training data is challenging. The proposed Masked Deep Embedding (MDE) pre-training method, extending the masked autoencoder (MAE) transformer, significantly enhances dental implant detection performance compared to baselines. Specifically, the proposed method achieves a best detection performance of AP = 96.1, outperforming supervised ViT and MAE baselines by up to +2.9 AP. In addition, we address the absence of a comprehensive dataset for implant design, enhancing an existing dataset under dental expert supervision. This augmentation includes annotations for implant design, such as coronal, middle, and apical parts, resulting in a unique Implant Design Dataset (IDD). The contributions encompass employing self-supervised learning for limited dental radiograph data, replacing MAE&amp;amp;rsquo;s patch reconstruction with patch embeddings, achieving substantial performance improvement in implant detection, and expanding possibilities through the labeling of implant design. This study paves the way for AI-driven solutions in implant dentistry, providing valuable tools for dentists and patients facing implant-related challenges.</description>
	<pubDate>2026-01-09</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 39: Self-Supervised Learning of Deep Embeddings for Classification and Identification of Dental Implants</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/39">doi: 10.3390/jimaging12010039</a></p>
	<p>Authors:
		Amani Almalki
		Abdulrahman Almalki
		Longin Jan Latecki
		</p>
	<p>This study proposes an automated system using deep learning-based object detection to identify implant systems, leveraging recent progress in self-supervised learning, specifically masked image modeling (MIM). We advocate for self-pre-training, emphasizing that its advantages when acquiring suitable pre-training data is challenging. The proposed Masked Deep Embedding (MDE) pre-training method, extending the masked autoencoder (MAE) transformer, significantly enhances dental implant detection performance compared to baselines. Specifically, the proposed method achieves a best detection performance of AP = 96.1, outperforming supervised ViT and MAE baselines by up to +2.9 AP. In addition, we address the absence of a comprehensive dataset for implant design, enhancing an existing dataset under dental expert supervision. This augmentation includes annotations for implant design, such as coronal, middle, and apical parts, resulting in a unique Implant Design Dataset (IDD). The contributions encompass employing self-supervised learning for limited dental radiograph data, replacing MAE&amp;amp;rsquo;s patch reconstruction with patch embeddings, achieving substantial performance improvement in implant detection, and expanding possibilities through the labeling of implant design. This study paves the way for AI-driven solutions in implant dentistry, providing valuable tools for dentists and patients facing implant-related challenges.</p>
	]]></content:encoded>

	<dc:title>Self-Supervised Learning of Deep Embeddings for Classification and Identification of Dental Implants</dc:title>
			<dc:creator>Amani Almalki</dc:creator>
			<dc:creator>Abdulrahman Almalki</dc:creator>
			<dc:creator>Longin Jan Latecki</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010039</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-09</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-09</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>39</prism:startingPage>
		<prism:doi>10.3390/jimaging12010039</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/39</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/38">

	<title>J. Imaging, Vol. 12, Pages 38: SCT-Diff: Seamless Contextual Tracking via Diffusion Trajectory</title>
	<link>https://www.mdpi.com/2313-433X/12/1/38</link>
	<description>Existing detection-based trackers exploit temporal contexts by updating appearance models or modeling target motion. However, the sequential one-shot integration of temporal priors risks amplifying error accumulation, as frame-level template matching restricts comprehensive spatiotemporal analysis. To address this, we propose SCT-Diff, a video-level framework that holistically estimates target trajectories. Specifically, SCT-Diff processes video clips globally via a diffusion model to incorporate bidirectional spatiotemporal awareness, where reverse diffusion steps progressively refine noisy trajectory proposals into optimal predictions. Crucially, SCT-Diff enables iterative correction of historical trajectory hypotheses by observing future contexts within a sliding time window. This closed-loop feedback from future frames preserves temporal consistency and breaks the error propagation chain under complex appearance variations. For joint modeling of appearance and motion dynamics, we formulate trajectories as unified discrete token sequences. The designed Mamba-based expert decoder bridges visual features with language-formulated trajectories, enabling lightweight yet coherent sequence modeling. Extensive experiments demonstrate SCT-Diff&amp;amp;rsquo;s superior efficiency and performance, achieving 75.4% AO on GOT-10k while maintaining real-time computational efficiency.</description>
	<pubDate>2026-01-09</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 38: SCT-Diff: Seamless Contextual Tracking via Diffusion Trajectory</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/38">doi: 10.3390/jimaging12010038</a></p>
	<p>Authors:
		Guohao Nie
		Xingmei Wang
		Debin Zhang
		He Wang
		</p>
	<p>Existing detection-based trackers exploit temporal contexts by updating appearance models or modeling target motion. However, the sequential one-shot integration of temporal priors risks amplifying error accumulation, as frame-level template matching restricts comprehensive spatiotemporal analysis. To address this, we propose SCT-Diff, a video-level framework that holistically estimates target trajectories. Specifically, SCT-Diff processes video clips globally via a diffusion model to incorporate bidirectional spatiotemporal awareness, where reverse diffusion steps progressively refine noisy trajectory proposals into optimal predictions. Crucially, SCT-Diff enables iterative correction of historical trajectory hypotheses by observing future contexts within a sliding time window. This closed-loop feedback from future frames preserves temporal consistency and breaks the error propagation chain under complex appearance variations. For joint modeling of appearance and motion dynamics, we formulate trajectories as unified discrete token sequences. The designed Mamba-based expert decoder bridges visual features with language-formulated trajectories, enabling lightweight yet coherent sequence modeling. Extensive experiments demonstrate SCT-Diff&amp;amp;rsquo;s superior efficiency and performance, achieving 75.4% AO on GOT-10k while maintaining real-time computational efficiency.</p>
	]]></content:encoded>

	<dc:title>SCT-Diff: Seamless Contextual Tracking via Diffusion Trajectory</dc:title>
			<dc:creator>Guohao Nie</dc:creator>
			<dc:creator>Xingmei Wang</dc:creator>
			<dc:creator>Debin Zhang</dc:creator>
			<dc:creator>He Wang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010038</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-09</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-09</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>38</prism:startingPage>
		<prism:doi>10.3390/jimaging12010038</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/38</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/37">

	<title>J. Imaging, Vol. 12, Pages 37: Degradation-Aware Multi-Stage Fusion for Underwater Image Enhancement</title>
	<link>https://www.mdpi.com/2313-433X/12/1/37</link>
	<description>Underwater images frequently suffer from color casts, low illumination, and blur due to wavelength-dependent absorption and scattering. We present a practical two-stage, modular, and degradation-aware framework designed for real-time enhancement, prioritizing deployability on edge devices. Stage I employs a lightweight CNN to classify inputs into three dominant degradation classes (color cast, low light, blur) with 91.85% accuracy on an EUVP subset. Stage II applies three scene-specific lightweight enhancement pipelines and fuses their outputs using two alternative learnable modules: a global Linear Fusion and a LiteUNetFusion (spatially adaptive weighting with optional residual correction). Compared to the three single-scene optimizers (average PSNR = 19.0 dB; mean UCIQE &amp;amp;asymp; 0.597; mean UIQM &amp;amp;asymp; 2.07), the Linear Fusion improves PSNR by +2.6 dB on average and yields roughly +20.7% in UCIQE and +21.0% in UIQM, while maintaining low latency (~90 ms per 640 &amp;amp;times; 480 frame on an Intel i5-13400F (Intel Corporation, Santa Clara, CA, USA). The LiteUNetFusion further refines results: it raises PSNR by +1.5 dB over the Linear model (23.1 vs. 21.6 dB), brings modest perceptual gains (UCIQE from 0.72 to 0.74, UIQM 2.5 to 2.8) at a runtime of &amp;amp;asymp;125 ms per 640 &amp;amp;times; 480 frame, and better preserves local texture and color consistency in mixed-degradation scenes. We release implementation details for reproducibility and discuss limitations (e.g., occasional blur/noise amplification and domain generalization) together with future directions.</description>
	<pubDate>2026-01-08</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 37: Degradation-Aware Multi-Stage Fusion for Underwater Image Enhancement</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/37">doi: 10.3390/jimaging12010037</a></p>
	<p>Authors:
		Lian Xie
		Hao Chen
		Jin Shu
		</p>
	<p>Underwater images frequently suffer from color casts, low illumination, and blur due to wavelength-dependent absorption and scattering. We present a practical two-stage, modular, and degradation-aware framework designed for real-time enhancement, prioritizing deployability on edge devices. Stage I employs a lightweight CNN to classify inputs into three dominant degradation classes (color cast, low light, blur) with 91.85% accuracy on an EUVP subset. Stage II applies three scene-specific lightweight enhancement pipelines and fuses their outputs using two alternative learnable modules: a global Linear Fusion and a LiteUNetFusion (spatially adaptive weighting with optional residual correction). Compared to the three single-scene optimizers (average PSNR = 19.0 dB; mean UCIQE &amp;amp;asymp; 0.597; mean UIQM &amp;amp;asymp; 2.07), the Linear Fusion improves PSNR by +2.6 dB on average and yields roughly +20.7% in UCIQE and +21.0% in UIQM, while maintaining low latency (~90 ms per 640 &amp;amp;times; 480 frame on an Intel i5-13400F (Intel Corporation, Santa Clara, CA, USA). The LiteUNetFusion further refines results: it raises PSNR by +1.5 dB over the Linear model (23.1 vs. 21.6 dB), brings modest perceptual gains (UCIQE from 0.72 to 0.74, UIQM 2.5 to 2.8) at a runtime of &amp;amp;asymp;125 ms per 640 &amp;amp;times; 480 frame, and better preserves local texture and color consistency in mixed-degradation scenes. We release implementation details for reproducibility and discuss limitations (e.g., occasional blur/noise amplification and domain generalization) together with future directions.</p>
	]]></content:encoded>

	<dc:title>Degradation-Aware Multi-Stage Fusion for Underwater Image Enhancement</dc:title>
			<dc:creator>Lian Xie</dc:creator>
			<dc:creator>Hao Chen</dc:creator>
			<dc:creator>Jin Shu</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010037</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-08</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-08</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>37</prism:startingPage>
		<prism:doi>10.3390/jimaging12010037</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/37</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/36">

	<title>J. Imaging, Vol. 12, Pages 36: A Hierarchical Deep Learning Architecture for Diagnosing Retinal Diseases Using Cross-Modal OCT to Fundus Translation in the Lack of Paired Data</title>
	<link>https://www.mdpi.com/2313-433X/12/1/36</link>
	<description>The paper focuses on automated diagnosis of retinal diseases, particularly Age-related Macular Degeneration (AMD) and diabetic retinopathy (DR), using optical coherence tomography (OCT), while addressing three key challenges: disease comorbidity, severe class imbalance, and the lack of strictly paired OCT and fundus data. We propose a hierarchical modular deep learning system designed for multi-label OCT screening with conditional routing to specialized staging modules. To enable DR staging when fundus images are unavailable, we use cross-modal alignment between OCT and fundus representations. This approach involves training a latent bridge that projects OCT embeddings into the fundus feature space. We enhance clinical reliability through per-class threshold calibration and implement quality control checks for OCT-only DR staging. Experiments demonstrate robust multi-label performance (macro-F1 =0.989&amp;amp;plusmn;0.006 after per-class threshold calibration) and reliable calibration (ECE =2.1&amp;amp;plusmn;0.4%), and OCT-only DR staging is feasible in 96.1% of cases that meet the quality control criterion.</description>
	<pubDate>2026-01-08</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 36: A Hierarchical Deep Learning Architecture for Diagnosing Retinal Diseases Using Cross-Modal OCT to Fundus Translation in the Lack of Paired Data</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/36">doi: 10.3390/jimaging12010036</a></p>
	<p>Authors:
		Ekaterina A. Lopukhova
		Gulnaz M. Idrisova
		Timur R. Mukhamadeev
		Grigory S. Voronkov
		Ruslan V. Kutluyarov
		Elizaveta P. Topolskaya
		</p>
	<p>The paper focuses on automated diagnosis of retinal diseases, particularly Age-related Macular Degeneration (AMD) and diabetic retinopathy (DR), using optical coherence tomography (OCT), while addressing three key challenges: disease comorbidity, severe class imbalance, and the lack of strictly paired OCT and fundus data. We propose a hierarchical modular deep learning system designed for multi-label OCT screening with conditional routing to specialized staging modules. To enable DR staging when fundus images are unavailable, we use cross-modal alignment between OCT and fundus representations. This approach involves training a latent bridge that projects OCT embeddings into the fundus feature space. We enhance clinical reliability through per-class threshold calibration and implement quality control checks for OCT-only DR staging. Experiments demonstrate robust multi-label performance (macro-F1 =0.989&amp;amp;plusmn;0.006 after per-class threshold calibration) and reliable calibration (ECE =2.1&amp;amp;plusmn;0.4%), and OCT-only DR staging is feasible in 96.1% of cases that meet the quality control criterion.</p>
	]]></content:encoded>

	<dc:title>A Hierarchical Deep Learning Architecture for Diagnosing Retinal Diseases Using Cross-Modal OCT to Fundus Translation in the Lack of Paired Data</dc:title>
			<dc:creator>Ekaterina A. Lopukhova</dc:creator>
			<dc:creator>Gulnaz M. Idrisova</dc:creator>
			<dc:creator>Timur R. Mukhamadeev</dc:creator>
			<dc:creator>Grigory S. Voronkov</dc:creator>
			<dc:creator>Ruslan V. Kutluyarov</dc:creator>
			<dc:creator>Elizaveta P. Topolskaya</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010036</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-08</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-08</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>36</prism:startingPage>
		<prism:doi>10.3390/jimaging12010036</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/36</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/35">

	<title>J. Imaging, Vol. 12, Pages 35: Comparison of the Radiomics Features of Normal-Appearing White Matter in Persons with High or Low Perivascular Space Scores</title>
	<link>https://www.mdpi.com/2313-433X/12/1/35</link>
	<description>The clinical significance of perivascular spaces (PVS) remains controversial. Radiomics refers to the extraction of quantitative features from medical images using pixel-based computational approaches. This study aimed to compare the radiomics features of normal-appearing white matter (NAWM) in patients with low and high PVS scores to reveal microstructural differences that are not visible macroscopically. Adult patients who underwent cranial MRI over a one-month period were retrospectively screened and divided into two groups according to their global PVS score. Radiomics feature extraction from NAWM was performed at the level of the centrum semiovale on FLAIR and ADC images. Radiomics features were selected using Least Absolute Shrinkage and Selection Operator (LASSO) regression during the initial model development phase, and predefined radiomics scores were evaluated for both sequences. A total of 160 patients were included in the study. Radiomics scores derived from normal-appearing white matter demonstrated good discriminative performance for differentiating high vs. low perivascular space (PVS) burden (AUC = 0.853 for FLAIR and AUC = 0.753 for ADC). In age- and scanner-adjusted multivariable models, radiomics scores remained independently associated with high PVS burden. These findings suggest that radiomics analysis of NAWM can capture subtle white matter alterations associated with PVS burden and may serve as a non-invasive biomarker for early detection of microvascular and inflammatory changes.</description>
	<pubDate>2026-01-08</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 35: Comparison of the Radiomics Features of Normal-Appearing White Matter in Persons with High or Low Perivascular Space Scores</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/35">doi: 10.3390/jimaging12010035</a></p>
	<p>Authors:
		Onural Ozturk
		Sibel Balci
		Seda Ozturk
		</p>
	<p>The clinical significance of perivascular spaces (PVS) remains controversial. Radiomics refers to the extraction of quantitative features from medical images using pixel-based computational approaches. This study aimed to compare the radiomics features of normal-appearing white matter (NAWM) in patients with low and high PVS scores to reveal microstructural differences that are not visible macroscopically. Adult patients who underwent cranial MRI over a one-month period were retrospectively screened and divided into two groups according to their global PVS score. Radiomics feature extraction from NAWM was performed at the level of the centrum semiovale on FLAIR and ADC images. Radiomics features were selected using Least Absolute Shrinkage and Selection Operator (LASSO) regression during the initial model development phase, and predefined radiomics scores were evaluated for both sequences. A total of 160 patients were included in the study. Radiomics scores derived from normal-appearing white matter demonstrated good discriminative performance for differentiating high vs. low perivascular space (PVS) burden (AUC = 0.853 for FLAIR and AUC = 0.753 for ADC). In age- and scanner-adjusted multivariable models, radiomics scores remained independently associated with high PVS burden. These findings suggest that radiomics analysis of NAWM can capture subtle white matter alterations associated with PVS burden and may serve as a non-invasive biomarker for early detection of microvascular and inflammatory changes.</p>
	]]></content:encoded>

	<dc:title>Comparison of the Radiomics Features of Normal-Appearing White Matter in Persons with High or Low Perivascular Space Scores</dc:title>
			<dc:creator>Onural Ozturk</dc:creator>
			<dc:creator>Sibel Balci</dc:creator>
			<dc:creator>Seda Ozturk</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010035</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-08</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-08</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Communication</prism:section>
	<prism:startingPage>35</prism:startingPage>
		<prism:doi>10.3390/jimaging12010035</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/35</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/34">

	<title>J. Imaging, Vol. 12, Pages 34: Empirical Evaluation of UNet for Segmentation of Applicable Surfaces for Seismic Sensor Installation</title>
	<link>https://www.mdpi.com/2313-433X/12/1/34</link>
	<description>The deployment of wireless seismic nodal systems necessitates the efficient identification of optimal locations for sensor installation, considering factors such as ground stability and the absence of interference. Semantic segmentation of satellite imagery has advanced significantly, and its application to this specific task remains unexplored. This work presents a baseline empirical evaluation of the U-Net architecture for the semantic segmentation of surfaces applicable for seismic sensor installation. We utilize a novel dataset of Sentinel-2 multispectral images, specifically labeled for this purpose. The study investigates the impact of pretrained encoders (EfficientNetB2, Cross-Stage Partial Darknet53&amp;amp;mdash;CSPDarknet53, and Multi-Axis Vision Transformer&amp;amp;mdash;MAxViT), different combinations of Sentinel-2 spectral bands (Red, Green, Blue (RGB), RGB+Near Infrared (NIR), 10-bands with 10 and 20 m/pix spatial resolution, full 13-band), and a technique for improving small object segmentation by modifying the input convolutional layer stride. Experimental results demonstrate that the CSPDarknet53 encoder generally outperforms the others (IoU = 0.534, Precision = 0.716, Recall = 0.635). The combination of RGB and Near-Infrared bands (10 m/pixel resolution) yielded the most robust performance across most configurations. Reducing the input stride from 2 to 1 proved beneficial for segmenting small linear objects like roads. The findings establish a baseline for this novel task and provide practical insights for optimizing deep learning models in the context of automated seismic nodal network installation planning.</description>
	<pubDate>2026-01-08</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 34: Empirical Evaluation of UNet for Segmentation of Applicable Surfaces for Seismic Sensor Installation</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/34">doi: 10.3390/jimaging12010034</a></p>
	<p>Authors:
		Mikhail Uzdiaev
		Marina Astapova
		Andrey Ronzhin
		Aleksandra Figurek
		</p>
	<p>The deployment of wireless seismic nodal systems necessitates the efficient identification of optimal locations for sensor installation, considering factors such as ground stability and the absence of interference. Semantic segmentation of satellite imagery has advanced significantly, and its application to this specific task remains unexplored. This work presents a baseline empirical evaluation of the U-Net architecture for the semantic segmentation of surfaces applicable for seismic sensor installation. We utilize a novel dataset of Sentinel-2 multispectral images, specifically labeled for this purpose. The study investigates the impact of pretrained encoders (EfficientNetB2, Cross-Stage Partial Darknet53&amp;amp;mdash;CSPDarknet53, and Multi-Axis Vision Transformer&amp;amp;mdash;MAxViT), different combinations of Sentinel-2 spectral bands (Red, Green, Blue (RGB), RGB+Near Infrared (NIR), 10-bands with 10 and 20 m/pix spatial resolution, full 13-band), and a technique for improving small object segmentation by modifying the input convolutional layer stride. Experimental results demonstrate that the CSPDarknet53 encoder generally outperforms the others (IoU = 0.534, Precision = 0.716, Recall = 0.635). The combination of RGB and Near-Infrared bands (10 m/pixel resolution) yielded the most robust performance across most configurations. Reducing the input stride from 2 to 1 proved beneficial for segmenting small linear objects like roads. The findings establish a baseline for this novel task and provide practical insights for optimizing deep learning models in the context of automated seismic nodal network installation planning.</p>
	]]></content:encoded>

	<dc:title>Empirical Evaluation of UNet for Segmentation of Applicable Surfaces for Seismic Sensor Installation</dc:title>
			<dc:creator>Mikhail Uzdiaev</dc:creator>
			<dc:creator>Marina Astapova</dc:creator>
			<dc:creator>Andrey Ronzhin</dc:creator>
			<dc:creator>Aleksandra Figurek</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010034</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-08</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-08</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>34</prism:startingPage>
		<prism:doi>10.3390/jimaging12010034</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/34</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/33">

	<title>J. Imaging, Vol. 12, Pages 33: A Unified Complex-Fresnel Model for Physically Based Long-Wave Infrared Imaging and Simulation</title>
	<link>https://www.mdpi.com/2313-433X/12/1/33</link>
	<description>Accurate modelling of reflection, transmission, absorption, and emission at material interfaces is essential for infrared imaging, rendering, and the simulation of optical and sensing systems. This need is particularly pronounced across the short-wave to long-wave infrared (SWIR&amp;amp;ndash;LWIR) spectrum, where many materials exhibit dispersion- and wavelength-dependent attenuation described by complex refractive indices. In this work, we introduce a unified formulation of the full Fresnel equations that directly incorporates wavelength-dependent complex refractive-index data and provides physically consistent interface behaviour for both dielectrics and conductors. The approach reformulates the classical Fresnel expressions to eliminate sign ambiguities and numerical instabilities, resulting in a stable evaluation across incidence angles and for strongly absorbing materials. We demonstrate the model through spectral-rendering simulations that illustrate realistic reflectance and transmittance behaviour for materials with different infrared optical properties. To assess its suitability for thermal-infrared applications, we also compare the simulated long-wave emission of a heated glass sphere with measurements from a LWIR camera. The agreement between measured and simulated radiometric trends indicates that the proposed formulation offers a practical and physically grounded tool for wavelength-parametric interface modelling in infrared imaging, supporting applications in spectral rendering, synthetic data generation, and infrared system analysis.</description>
	<pubDate>2026-01-07</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 33: A Unified Complex-Fresnel Model for Physically Based Long-Wave Infrared Imaging and Simulation</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/33">doi: 10.3390/jimaging12010033</a></p>
	<p>Authors:
		Peter ter Heerdt
		William Keustermans
		Ivan De Boi
		Steve Vanlanduit
		</p>
	<p>Accurate modelling of reflection, transmission, absorption, and emission at material interfaces is essential for infrared imaging, rendering, and the simulation of optical and sensing systems. This need is particularly pronounced across the short-wave to long-wave infrared (SWIR&amp;amp;ndash;LWIR) spectrum, where many materials exhibit dispersion- and wavelength-dependent attenuation described by complex refractive indices. In this work, we introduce a unified formulation of the full Fresnel equations that directly incorporates wavelength-dependent complex refractive-index data and provides physically consistent interface behaviour for both dielectrics and conductors. The approach reformulates the classical Fresnel expressions to eliminate sign ambiguities and numerical instabilities, resulting in a stable evaluation across incidence angles and for strongly absorbing materials. We demonstrate the model through spectral-rendering simulations that illustrate realistic reflectance and transmittance behaviour for materials with different infrared optical properties. To assess its suitability for thermal-infrared applications, we also compare the simulated long-wave emission of a heated glass sphere with measurements from a LWIR camera. The agreement between measured and simulated radiometric trends indicates that the proposed formulation offers a practical and physically grounded tool for wavelength-parametric interface modelling in infrared imaging, supporting applications in spectral rendering, synthetic data generation, and infrared system analysis.</p>
	]]></content:encoded>

	<dc:title>A Unified Complex-Fresnel Model for Physically Based Long-Wave Infrared Imaging and Simulation</dc:title>
			<dc:creator>Peter ter Heerdt</dc:creator>
			<dc:creator>William Keustermans</dc:creator>
			<dc:creator>Ivan De Boi</dc:creator>
			<dc:creator>Steve Vanlanduit</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010033</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-07</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-07</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>33</prism:startingPage>
		<prism:doi>10.3390/jimaging12010033</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/33</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/31">

	<title>J. Imaging, Vol. 12, Pages 31: Deep Learning-Assisted Autofocus for Aerial Cameras in Maritime Photography</title>
	<link>https://www.mdpi.com/2313-433X/12/1/31</link>
	<description>To address the unreliable autofocus problem of drone-mounted visible-light aerial cameras in low-contrast maritime environments, this paper proposes an autofocus system that combines deep-learning-based coarse focusing with traditional search-based fine adjustment. The system uses a built-in high-contrast resolution test chart as the signal source. Images captured by the imaging sensor are fed into a lightweight convolutional neural network to regress the defocus distance, enabling fast focus positioning. This avoids the weak signal and inaccurate focusing often encountered when adjusting focus directly on low-contrast sea surfaces. In the fine-focusing stage, a hybrid strategy integrating hill-climbing search and inverse correction is adopted. By evaluating the image sharpness function, the system accurately locks onto the optimal focal plane, forming intelligent closed-loop control. Experiments show that this method, which combines imaging of the built-in calibration target with deep-learning-based coarse focusing, significantly improves focusing efficiency. Compared with traditional full-range search strategies, the focusing speed is increased by approximately 60%. While ensuring high accuracy and strong adaptability, the proposed approach effectively enhances the overall imaging performance of aerial cameras in low-contrast maritime conditions.</description>
	<pubDate>2026-01-07</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 31: Deep Learning-Assisted Autofocus for Aerial Cameras in Maritime Photography</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/31">doi: 10.3390/jimaging12010031</a></p>
	<p>Authors:
		Haiying Liu
		Yingchao Li
		Shilong Xu
		Haoyu Wang
		Qiang Fu
		Huilin Jiang
		</p>
	<p>To address the unreliable autofocus problem of drone-mounted visible-light aerial cameras in low-contrast maritime environments, this paper proposes an autofocus system that combines deep-learning-based coarse focusing with traditional search-based fine adjustment. The system uses a built-in high-contrast resolution test chart as the signal source. Images captured by the imaging sensor are fed into a lightweight convolutional neural network to regress the defocus distance, enabling fast focus positioning. This avoids the weak signal and inaccurate focusing often encountered when adjusting focus directly on low-contrast sea surfaces. In the fine-focusing stage, a hybrid strategy integrating hill-climbing search and inverse correction is adopted. By evaluating the image sharpness function, the system accurately locks onto the optimal focal plane, forming intelligent closed-loop control. Experiments show that this method, which combines imaging of the built-in calibration target with deep-learning-based coarse focusing, significantly improves focusing efficiency. Compared with traditional full-range search strategies, the focusing speed is increased by approximately 60%. While ensuring high accuracy and strong adaptability, the proposed approach effectively enhances the overall imaging performance of aerial cameras in low-contrast maritime conditions.</p>
	]]></content:encoded>

	<dc:title>Deep Learning-Assisted Autofocus for Aerial Cameras in Maritime Photography</dc:title>
			<dc:creator>Haiying Liu</dc:creator>
			<dc:creator>Yingchao Li</dc:creator>
			<dc:creator>Shilong Xu</dc:creator>
			<dc:creator>Haoyu Wang</dc:creator>
			<dc:creator>Qiang Fu</dc:creator>
			<dc:creator>Huilin Jiang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010031</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-07</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-07</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>31</prism:startingPage>
		<prism:doi>10.3390/jimaging12010031</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/31</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/30">

	<title>J. Imaging, Vol. 12, Pages 30: From Visual to Multimodal: Systematic Ablation of Encoders and Fusion Strategies in Animal Identification</title>
	<link>https://www.mdpi.com/2313-433X/12/1/30</link>
	<description>Automated animal identification is a practical task for reuniting lost pets with their owners, yet current systems often struggle due to limited dataset scale and reliance on unimodal visual cues. This study introduces a multimodal verification framework that enhances visual features with semantic identity priors derived from synthetic textual descriptions. We constructed a massive training corpus of 1.9 million photographs covering 695,091 unique animals to support this investigation. Through systematic ablation studies, we identified SigLIP2-Giant and E5-Small-v2 as the optimal vision and text backbones. We further evaluated fusion strategies ranging from simple concatenation to adaptive gating to determine the best method for integrating these modalities. Our proposed approach utilizes a gated fusion mechanism and achieved a Top-1 accuracy of 84.28% and an Equal Error Rate of 0.0422 on a comprehensive test protocol. These results represent an 11% improvement over leading unimodal baselines and demonstrate that integrating synthesized semantic descriptions significantly refines decision boundaries in large-scale pet re-identification.</description>
	<pubDate>2026-01-07</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 30: From Visual to Multimodal: Systematic Ablation of Encoders and Fusion Strategies in Animal Identification</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/30">doi: 10.3390/jimaging12010030</a></p>
	<p>Authors:
		Vasiliy Kudryavtsev
		Kirill Borodin
		German Berezin
		Kirill Bubenchikov
		Grach Mkrtchian
		Alexander Ryzhkov
		</p>
	<p>Automated animal identification is a practical task for reuniting lost pets with their owners, yet current systems often struggle due to limited dataset scale and reliance on unimodal visual cues. This study introduces a multimodal verification framework that enhances visual features with semantic identity priors derived from synthetic textual descriptions. We constructed a massive training corpus of 1.9 million photographs covering 695,091 unique animals to support this investigation. Through systematic ablation studies, we identified SigLIP2-Giant and E5-Small-v2 as the optimal vision and text backbones. We further evaluated fusion strategies ranging from simple concatenation to adaptive gating to determine the best method for integrating these modalities. Our proposed approach utilizes a gated fusion mechanism and achieved a Top-1 accuracy of 84.28% and an Equal Error Rate of 0.0422 on a comprehensive test protocol. These results represent an 11% improvement over leading unimodal baselines and demonstrate that integrating synthesized semantic descriptions significantly refines decision boundaries in large-scale pet re-identification.</p>
	]]></content:encoded>

	<dc:title>From Visual to Multimodal: Systematic Ablation of Encoders and Fusion Strategies in Animal Identification</dc:title>
			<dc:creator>Vasiliy Kudryavtsev</dc:creator>
			<dc:creator>Kirill Borodin</dc:creator>
			<dc:creator>German Berezin</dc:creator>
			<dc:creator>Kirill Bubenchikov</dc:creator>
			<dc:creator>Grach Mkrtchian</dc:creator>
			<dc:creator>Alexander Ryzhkov</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010030</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-07</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-07</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>30</prism:startingPage>
		<prism:doi>10.3390/jimaging12010030</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/30</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/32">

	<title>J. Imaging, Vol. 12, Pages 32: Hybrid Skeleton-Based Motion Templates for Cross-View and Appearance-Robust Gait Recognition</title>
	<link>https://www.mdpi.com/2313-433X/12/1/32</link>
	<description>Gait recognition methods based on silhouette templates, such as the Gait Energy Image (GEI), achieve high accuracy under controlled conditions but often degrade when appearance varies due to viewpoint, clothing, or carried objects. In contrast, skeleton-based approaches provide interpretable motion cues but remain sensitive to pose-estimation noise. This work proposes two compact 2D skeletal descriptors&amp;amp;mdash;Gait Skeleton Images (GSIs)&amp;amp;mdash;that encode 3D joint trajectories into line-based and joint-based static templates compatible with standard 2D CNN architectures. A unified processing pipeline is introduced, including skeletal topology normalization, rigid view alignment, orthographic projection, and pixel-level rendering. Core design factors are analyzed on the GRIDDS dataset, where depth-based 3D coordinates provide stable ground truth for evaluating structural choices and rendering parameters. An extensive evaluation is then conducted on the widely used CASIA-B dataset, using 3D coordinates estimated via human pose estimation, to assess robustness under viewpoint, clothing, and carrying covariates. Results show that although GEIs achieve the highest same-view accuracy, GSI variants exhibit reduced degradation under appearance changes and demonstrate greater stability under severe cross-view conditions. These findings indicate that compact skeletal templates can complement appearance-based descriptors and may benefit further from continued advances in 3D human pose estimation.</description>
	<pubDate>2026-01-07</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 32: Hybrid Skeleton-Based Motion Templates for Cross-View and Appearance-Robust Gait Recognition</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/32">doi: 10.3390/jimaging12010032</a></p>
	<p>Authors:
		João Ferreira Nunes
		Pedro Miguel Moreira
		João Manuel R. S. Tavares
		</p>
	<p>Gait recognition methods based on silhouette templates, such as the Gait Energy Image (GEI), achieve high accuracy under controlled conditions but often degrade when appearance varies due to viewpoint, clothing, or carried objects. In contrast, skeleton-based approaches provide interpretable motion cues but remain sensitive to pose-estimation noise. This work proposes two compact 2D skeletal descriptors&amp;amp;mdash;Gait Skeleton Images (GSIs)&amp;amp;mdash;that encode 3D joint trajectories into line-based and joint-based static templates compatible with standard 2D CNN architectures. A unified processing pipeline is introduced, including skeletal topology normalization, rigid view alignment, orthographic projection, and pixel-level rendering. Core design factors are analyzed on the GRIDDS dataset, where depth-based 3D coordinates provide stable ground truth for evaluating structural choices and rendering parameters. An extensive evaluation is then conducted on the widely used CASIA-B dataset, using 3D coordinates estimated via human pose estimation, to assess robustness under viewpoint, clothing, and carrying covariates. Results show that although GEIs achieve the highest same-view accuracy, GSI variants exhibit reduced degradation under appearance changes and demonstrate greater stability under severe cross-view conditions. These findings indicate that compact skeletal templates can complement appearance-based descriptors and may benefit further from continued advances in 3D human pose estimation.</p>
	]]></content:encoded>

	<dc:title>Hybrid Skeleton-Based Motion Templates for Cross-View and Appearance-Robust Gait Recognition</dc:title>
			<dc:creator>João Ferreira Nunes</dc:creator>
			<dc:creator>Pedro Miguel Moreira</dc:creator>
			<dc:creator>João Manuel R. S. Tavares</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010032</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-07</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-07</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>32</prism:startingPage>
		<prism:doi>10.3390/jimaging12010032</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/32</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/29">

	<title>J. Imaging, Vol. 12, Pages 29: DynMultiDep: A Dynamic Multimodal Fusion and Multi-Scale Time Series Modeling Approach for Depression Detection</title>
	<link>https://www.mdpi.com/2313-433X/12/1/29</link>
	<description>Depression is a prevalent mental disorder that imposes a significant public health burden worldwide. Although multimodal detection methods have shown potential, existing techniques still face two critical bottlenecks: (i) insufficient integration of global patterns and local fluctuations in long-sequence modeling and (ii) static fusion strategies that fail to dynamically adapt to the complementarity and redundancy among modalities. To address these challenges, this paper proposes a dynamic multimodal depression detection framework, DynMultiDep, which combines multi-scale temporal modeling with an adaptive fusion mechanism. The core innovations of DynMultiDep lie in its Multi-scale Temporal Experts Module (MTEM) and Dynamic Multimodal Fusion module (DynMM). On one hand, MTEM employs Mamba experts to extract long-term trend features and utilizes local-window Transformers to capture short-term dynamic fluctuations, achieving adaptive fusion through a long-short routing mechanism. On the other hand, DynMM introduces modality-level and fusion-level dynamic decision-making, selecting critical modality paths and optimizing cross-modal interaction strategies based on input characteristics. The experimental results demonstrate that DynMultiDep outperforms existing state-of-the-art methods in detection performance on two widely used large-scale depression datasets.</description>
	<pubDate>2026-01-06</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 29: DynMultiDep: A Dynamic Multimodal Fusion and Multi-Scale Time Series Modeling Approach for Depression Detection</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/29">doi: 10.3390/jimaging12010029</a></p>
	<p>Authors:
		Jincheng Li
		Menglin Zheng
		Jiongyi Yang
		Yihui Zhan
		Xing Xie
		</p>
	<p>Depression is a prevalent mental disorder that imposes a significant public health burden worldwide. Although multimodal detection methods have shown potential, existing techniques still face two critical bottlenecks: (i) insufficient integration of global patterns and local fluctuations in long-sequence modeling and (ii) static fusion strategies that fail to dynamically adapt to the complementarity and redundancy among modalities. To address these challenges, this paper proposes a dynamic multimodal depression detection framework, DynMultiDep, which combines multi-scale temporal modeling with an adaptive fusion mechanism. The core innovations of DynMultiDep lie in its Multi-scale Temporal Experts Module (MTEM) and Dynamic Multimodal Fusion module (DynMM). On one hand, MTEM employs Mamba experts to extract long-term trend features and utilizes local-window Transformers to capture short-term dynamic fluctuations, achieving adaptive fusion through a long-short routing mechanism. On the other hand, DynMM introduces modality-level and fusion-level dynamic decision-making, selecting critical modality paths and optimizing cross-modal interaction strategies based on input characteristics. The experimental results demonstrate that DynMultiDep outperforms existing state-of-the-art methods in detection performance on two widely used large-scale depression datasets.</p>
	]]></content:encoded>

	<dc:title>DynMultiDep: A Dynamic Multimodal Fusion and Multi-Scale Time Series Modeling Approach for Depression Detection</dc:title>
			<dc:creator>Jincheng Li</dc:creator>
			<dc:creator>Menglin Zheng</dc:creator>
			<dc:creator>Jiongyi Yang</dc:creator>
			<dc:creator>Yihui Zhan</dc:creator>
			<dc:creator>Xing Xie</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010029</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-06</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-06</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>29</prism:startingPage>
		<prism:doi>10.3390/jimaging12010029</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/29</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/28">

	<title>J. Imaging, Vol. 12, Pages 28: Ultrashort Echo Time Quantitative Susceptibility Source Separation in Musculoskeletal System: A Feasibility Study</title>
	<link>https://www.mdpi.com/2313-433X/12/1/28</link>
	<description>This study aims to demonstrate the feasibility of ultrashort echo time (UTE)-based susceptibility source separation for musculoskeletal (MSK) imaging, enabling discrimination between diamagnetic and paramagnetic tissue components, with a particular focus on hemophilic arthropathy (HA). Three key techniques were integrated to achieve UTE-based susceptibility source separation: Iterative decomposition of water and fat with echo asymmetry and least-squares estimation for B0 field estimation, projection onto dipole fields for local field mapping, and &amp;amp;chi;-separation for quantitative susceptibility mapping (QSM) with source decomposition. A phantom containing varying concentrations of diamagnetic (CaCO3) and paramagnetic (Fe3O4) materials was used to validate the method. In addition, in vivo UTE-QSM scans of the knees and ankles were performed on five HA patients using a 3T clinical MRI scanner. In the phantom, conventional QSM underestimated susceptibility values due to the mixed-source cancelling the effect. In contrast, source-separated maps provided distinct diamagnetic and paramagnetic susceptibility values that correlated strongly with CaCO3 and Fe3O4 concentrations (r = &amp;amp;minus;0.99 and 0.95, p &amp;amp;lt; 0.05). In vivo, paramagnetic maps enabled improved visualization of hemosiderin deposits in joints of HA patients, which were poorly visualized or obscured in conventional QSM due to susceptibility cancellation by surrounding diamagnetic tissues such as bone. This study demonstrates, for the first time, the feasibility of UTE-based quantitative susceptibility source separation for MSK applications. The approach enhances the detection of paramagnetic substances like hemosiderin in HA and offers potential for improved assessment of bone and joint tissue composition.</description>
	<pubDate>2026-01-06</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 28: Ultrashort Echo Time Quantitative Susceptibility Source Separation in Musculoskeletal System: A Feasibility Study</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/28">doi: 10.3390/jimaging12010028</a></p>
	<p>Authors:
		Sam Sedaghat
		Jin Il Park
		Eddie Fu
		Annette von Drygalski
		Yajun Ma
		Eric Y. Chang
		Jiang Du
		Lorenzo Nardo
		Hyungseok Jang
		</p>
	<p>This study aims to demonstrate the feasibility of ultrashort echo time (UTE)-based susceptibility source separation for musculoskeletal (MSK) imaging, enabling discrimination between diamagnetic and paramagnetic tissue components, with a particular focus on hemophilic arthropathy (HA). Three key techniques were integrated to achieve UTE-based susceptibility source separation: Iterative decomposition of water and fat with echo asymmetry and least-squares estimation for B0 field estimation, projection onto dipole fields for local field mapping, and &amp;amp;chi;-separation for quantitative susceptibility mapping (QSM) with source decomposition. A phantom containing varying concentrations of diamagnetic (CaCO3) and paramagnetic (Fe3O4) materials was used to validate the method. In addition, in vivo UTE-QSM scans of the knees and ankles were performed on five HA patients using a 3T clinical MRI scanner. In the phantom, conventional QSM underestimated susceptibility values due to the mixed-source cancelling the effect. In contrast, source-separated maps provided distinct diamagnetic and paramagnetic susceptibility values that correlated strongly with CaCO3 and Fe3O4 concentrations (r = &amp;amp;minus;0.99 and 0.95, p &amp;amp;lt; 0.05). In vivo, paramagnetic maps enabled improved visualization of hemosiderin deposits in joints of HA patients, which were poorly visualized or obscured in conventional QSM due to susceptibility cancellation by surrounding diamagnetic tissues such as bone. This study demonstrates, for the first time, the feasibility of UTE-based quantitative susceptibility source separation for MSK applications. The approach enhances the detection of paramagnetic substances like hemosiderin in HA and offers potential for improved assessment of bone and joint tissue composition.</p>
	]]></content:encoded>

	<dc:title>Ultrashort Echo Time Quantitative Susceptibility Source Separation in Musculoskeletal System: A Feasibility Study</dc:title>
			<dc:creator>Sam Sedaghat</dc:creator>
			<dc:creator>Jin Il Park</dc:creator>
			<dc:creator>Eddie Fu</dc:creator>
			<dc:creator>Annette von Drygalski</dc:creator>
			<dc:creator>Yajun Ma</dc:creator>
			<dc:creator>Eric Y. Chang</dc:creator>
			<dc:creator>Jiang Du</dc:creator>
			<dc:creator>Lorenzo Nardo</dc:creator>
			<dc:creator>Hyungseok Jang</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010028</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-06</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-06</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>28</prism:startingPage>
		<prism:doi>10.3390/jimaging12010028</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/28</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/27">

	<title>J. Imaging, Vol. 12, Pages 27: Vision-Based People Counting and Tracking for Urban Environments</title>
	<link>https://www.mdpi.com/2313-433X/12/1/27</link>
	<description>Population growth and expansion of urban areas increase the need for the introduction of intelligent passenger traffic monitoring systems. Accurate estimation of the number of passengers is an important condition for improving the efficiency, safety and quality of transport services. This paper proposes an approach to the automatic detection and counting of people using computer vision and deep learning methods. While YOLOv8 and DeepSORT have been widely explored individually, our contribution lies in a task-specific modification of the DeepSORT tracking pipeline, optimized for dense passenger environments, strong occlusions, and dynamic lighting, as well as in a unified architecture that integrates detection, tracking, and automatic event-log generation. Our new proprietary dataset of 4047 images and 8918 labeled objects has achieved 92% detection accuracy and 85% counting accuracy, which confirms the effectiveness of the solution. Compared to Mask R-CNN and DETR, the YOLOv8 model demonstrates an optimal balance between speed, accuracy, and computational efficiency. The results confirm that computer vision can become an efficient and scalable replacement for traditional sensory passenger counting systems. The developed architecture (YOLO + Tracking) combines recognition, tracking and counting of people into a single system that automatically generates annotated video streams and event logs. In the future, it is planned to expand the dataset, introduce support for multicamera integration, and adapt the model for embedded devices to improve the accuracy and energy efficiency of the solution in real-world conditions.</description>
	<pubDate>2026-01-05</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 27: Vision-Based People Counting and Tracking for Urban Environments</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/27">doi: 10.3390/jimaging12010027</a></p>
	<p>Authors:
		Daniyar Nurseitov
		Kairat Bostanbekov
		Nazgul Toiganbayeva
		Aidana Zhalgas
		Didar Yedilkhan
		Beibut Amirgaliyev
		</p>
	<p>Population growth and expansion of urban areas increase the need for the introduction of intelligent passenger traffic monitoring systems. Accurate estimation of the number of passengers is an important condition for improving the efficiency, safety and quality of transport services. This paper proposes an approach to the automatic detection and counting of people using computer vision and deep learning methods. While YOLOv8 and DeepSORT have been widely explored individually, our contribution lies in a task-specific modification of the DeepSORT tracking pipeline, optimized for dense passenger environments, strong occlusions, and dynamic lighting, as well as in a unified architecture that integrates detection, tracking, and automatic event-log generation. Our new proprietary dataset of 4047 images and 8918 labeled objects has achieved 92% detection accuracy and 85% counting accuracy, which confirms the effectiveness of the solution. Compared to Mask R-CNN and DETR, the YOLOv8 model demonstrates an optimal balance between speed, accuracy, and computational efficiency. The results confirm that computer vision can become an efficient and scalable replacement for traditional sensory passenger counting systems. The developed architecture (YOLO + Tracking) combines recognition, tracking and counting of people into a single system that automatically generates annotated video streams and event logs. In the future, it is planned to expand the dataset, introduce support for multicamera integration, and adapt the model for embedded devices to improve the accuracy and energy efficiency of the solution in real-world conditions.</p>
	]]></content:encoded>

	<dc:title>Vision-Based People Counting and Tracking for Urban Environments</dc:title>
			<dc:creator>Daniyar Nurseitov</dc:creator>
			<dc:creator>Kairat Bostanbekov</dc:creator>
			<dc:creator>Nazgul Toiganbayeva</dc:creator>
			<dc:creator>Aidana Zhalgas</dc:creator>
			<dc:creator>Didar Yedilkhan</dc:creator>
			<dc:creator>Beibut Amirgaliyev</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010027</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-05</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-05</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>27</prism:startingPage>
		<prism:doi>10.3390/jimaging12010027</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/27</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/26">

	<title>J. Imaging, Vol. 12, Pages 26: A Hierarchical Multi-Resolution Self-Supervised Framework for High-Fidelity 3D Face Reconstruction Using Learnable Gabor-Aware Texture Modeling</title>
	<link>https://www.mdpi.com/2313-433X/12/1/26</link>
	<description>High-fidelity 3D face reconstruction from a single image is challenging, owing to the inherently ambiguous depth cues and the strong entanglement of multi-scale facial textures. In this regard, we propose a hierarchical multi-resolution self-supervised framework (HMR-Framework), which reconstructs coarse-, medium-, and fine-scale facial geometry progressively through a unified pipeline. A coarse geometric prior is first estimated via 3D morphable model regression, followed by medium-scale refinement using a vertex deformation map constrained by a global&amp;amp;ndash;local Markov random field loss to preserve structural coherence. In order to improve fine-scale fidelity, a learnable Gabor-aware texture enhancement module has been proposed to decouple spatial&amp;amp;ndash;frequency information and thus improve sensitivity for high-frequency facial attributes. Additionally, we employ a wavelet-based detail perception loss to preserve the edge-aware texture features while mitigating noise commonly observed in in-the-wild images. Extensive qualitative and quantitative evaluation of benchmark datasets indicate that the proposed framework provides better fine-detail reconstruction than existing state-of-the-art methods, while maintaining robustness over pose variations. Notably, the hierarchical design increases semantic consistency across multiple geometric scales, providing a functional solution for high-fidelity 3D face reconstruction from monocular images.</description>
	<pubDate>2026-01-05</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 26: A Hierarchical Multi-Resolution Self-Supervised Framework for High-Fidelity 3D Face Reconstruction Using Learnable Gabor-Aware Texture Modeling</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/26">doi: 10.3390/jimaging12010026</a></p>
	<p>Authors:
		Pichet Mareo
		Rerkchai Fooprateepsiri
		</p>
	<p>High-fidelity 3D face reconstruction from a single image is challenging, owing to the inherently ambiguous depth cues and the strong entanglement of multi-scale facial textures. In this regard, we propose a hierarchical multi-resolution self-supervised framework (HMR-Framework), which reconstructs coarse-, medium-, and fine-scale facial geometry progressively through a unified pipeline. A coarse geometric prior is first estimated via 3D morphable model regression, followed by medium-scale refinement using a vertex deformation map constrained by a global&amp;amp;ndash;local Markov random field loss to preserve structural coherence. In order to improve fine-scale fidelity, a learnable Gabor-aware texture enhancement module has been proposed to decouple spatial&amp;amp;ndash;frequency information and thus improve sensitivity for high-frequency facial attributes. Additionally, we employ a wavelet-based detail perception loss to preserve the edge-aware texture features while mitigating noise commonly observed in in-the-wild images. Extensive qualitative and quantitative evaluation of benchmark datasets indicate that the proposed framework provides better fine-detail reconstruction than existing state-of-the-art methods, while maintaining robustness over pose variations. Notably, the hierarchical design increases semantic consistency across multiple geometric scales, providing a functional solution for high-fidelity 3D face reconstruction from monocular images.</p>
	]]></content:encoded>

	<dc:title>A Hierarchical Multi-Resolution Self-Supervised Framework for High-Fidelity 3D Face Reconstruction Using Learnable Gabor-Aware Texture Modeling</dc:title>
			<dc:creator>Pichet Mareo</dc:creator>
			<dc:creator>Rerkchai Fooprateepsiri</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010026</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-05</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-05</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>26</prism:startingPage>
		<prism:doi>10.3390/jimaging12010026</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/26</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/25">

	<title>J. Imaging, Vol. 12, Pages 25: A Slicer-Independent Framework for Measuring G-Code Accuracy in Medical 3D Printing</title>
	<link>https://www.mdpi.com/2313-433X/12/1/25</link>
	<description>In medical 3D printing, accuracy is critical for fabricating patient-specific implants and anatomical models. Although printer performance has been widely examined, the influence of slicing software on geometric fidelity is less frequently quantified. The slicing step, which converts STL files into printer-readable G-code, may introduce deviations that affect the final printed object. To quantify slicer-induced G-code deviations by comparing G-code-derived geometries with their reference STL modelsTwenty mandibular models were processed using five slicers (PrusaSlicer (version 2.9.1.), Cura (version 5.2.2.), Simplify3D (version 4.1.2.), Slic3r (version 1.3.0.) and Fusion 360 (version 2.0.19725)). A custom Python workflow converted the G-code into point clouds and reconstructed STL meshes through XY and Z corrections, marching cubes surface extraction, and volumetric extrusion. A calibration object enabled coordinate normalization across slicers. Accuracy was assessed using Mean Surface Distance (MSD), Root Mean Square (RMS) deviation, and Volume Difference. MSD ranged from 0.071 to 0.095 mm, and RMS deviation from 0.084 to 0.113 mm, depending on the slicer. Volumetric differences were slicer-dependent. PrusaSlicer yielded the highest surface accuracy; Simplify3D and Slic3r showed best repeatability. Fusion 360 produced the largest deviations. The slicers introduced geometric deviations below 0.1 mm that represent a substantial proportion of the overall error in the FDM workflow.</description>
	<pubDate>2026-01-04</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 25: A Slicer-Independent Framework for Measuring G-Code Accuracy in Medical 3D Printing</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/25">doi: 10.3390/jimaging12010025</a></p>
	<p>Authors:
		Michel Beyer
		Alexandru Burde
		Andreas E. Roser
		Maximiliane Beyer
		Sead Abazi
		Florian M. Thieringer
		</p>
	<p>In medical 3D printing, accuracy is critical for fabricating patient-specific implants and anatomical models. Although printer performance has been widely examined, the influence of slicing software on geometric fidelity is less frequently quantified. The slicing step, which converts STL files into printer-readable G-code, may introduce deviations that affect the final printed object. To quantify slicer-induced G-code deviations by comparing G-code-derived geometries with their reference STL modelsTwenty mandibular models were processed using five slicers (PrusaSlicer (version 2.9.1.), Cura (version 5.2.2.), Simplify3D (version 4.1.2.), Slic3r (version 1.3.0.) and Fusion 360 (version 2.0.19725)). A custom Python workflow converted the G-code into point clouds and reconstructed STL meshes through XY and Z corrections, marching cubes surface extraction, and volumetric extrusion. A calibration object enabled coordinate normalization across slicers. Accuracy was assessed using Mean Surface Distance (MSD), Root Mean Square (RMS) deviation, and Volume Difference. MSD ranged from 0.071 to 0.095 mm, and RMS deviation from 0.084 to 0.113 mm, depending on the slicer. Volumetric differences were slicer-dependent. PrusaSlicer yielded the highest surface accuracy; Simplify3D and Slic3r showed best repeatability. Fusion 360 produced the largest deviations. The slicers introduced geometric deviations below 0.1 mm that represent a substantial proportion of the overall error in the FDM workflow.</p>
	]]></content:encoded>

	<dc:title>A Slicer-Independent Framework for Measuring G-Code Accuracy in Medical 3D Printing</dc:title>
			<dc:creator>Michel Beyer</dc:creator>
			<dc:creator>Alexandru Burde</dc:creator>
			<dc:creator>Andreas E. Roser</dc:creator>
			<dc:creator>Maximiliane Beyer</dc:creator>
			<dc:creator>Sead Abazi</dc:creator>
			<dc:creator>Florian M. Thieringer</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010025</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-04</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-04</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>25</prism:startingPage>
		<prism:doi>10.3390/jimaging12010025</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/25</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/24">

	<title>J. Imaging, Vol. 12, Pages 24: LLM-Based Pose Normalization and Multimodal Fusion for Facial Expression Recognition in Extreme Poses</title>
	<link>https://www.mdpi.com/2313-433X/12/1/24</link>
	<description>Facial expression recognition (FER) technology has progressively matured over time. However, existing FER methods are primarily optimized for frontal face images, and their recognition accuracy significantly degrades when processing profile or large-angle rotated facial images. Consequently, this limitation hinders the practical deployment of FER systems. To mitigate the interference caused by large pose variations and improve recognition accuracy, we propose a FER method based on profile-to-frontal transformation and multimodal learning. Specifically, we first leverage the visual understanding and generation capabilities of Qwen-Image-Edit that transform profile images to frontal viewpoints, preserving key expression features while standardizing facial poses. Second, we introduce the CLIP model to enhance the semantic representation capability of expression features through vision&amp;amp;ndash;language joint learning. The qualitative and quantitative experiments on the RAF (89.39%), EXPW (67.17%), and AffectNet-7 (62.66%) datasets demonstrate that our method outperforms the existing approaches.</description>
	<pubDate>2026-01-04</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 24: LLM-Based Pose Normalization and Multimodal Fusion for Facial Expression Recognition in Extreme Poses</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/24">doi: 10.3390/jimaging12010024</a></p>
	<p>Authors:
		Bohan Chen
		Bowen Qu
		Yu Zhou
		Han Huang
		Jianing Guo
		Yanning Xian
		Longxiang Ma
		Jinxuan Yu
		Jingyu Chen
		</p>
	<p>Facial expression recognition (FER) technology has progressively matured over time. However, existing FER methods are primarily optimized for frontal face images, and their recognition accuracy significantly degrades when processing profile or large-angle rotated facial images. Consequently, this limitation hinders the practical deployment of FER systems. To mitigate the interference caused by large pose variations and improve recognition accuracy, we propose a FER method based on profile-to-frontal transformation and multimodal learning. Specifically, we first leverage the visual understanding and generation capabilities of Qwen-Image-Edit that transform profile images to frontal viewpoints, preserving key expression features while standardizing facial poses. Second, we introduce the CLIP model to enhance the semantic representation capability of expression features through vision&amp;amp;ndash;language joint learning. The qualitative and quantitative experiments on the RAF (89.39%), EXPW (67.17%), and AffectNet-7 (62.66%) datasets demonstrate that our method outperforms the existing approaches.</p>
	]]></content:encoded>

	<dc:title>LLM-Based Pose Normalization and Multimodal Fusion for Facial Expression Recognition in Extreme Poses</dc:title>
			<dc:creator>Bohan Chen</dc:creator>
			<dc:creator>Bowen Qu</dc:creator>
			<dc:creator>Yu Zhou</dc:creator>
			<dc:creator>Han Huang</dc:creator>
			<dc:creator>Jianing Guo</dc:creator>
			<dc:creator>Yanning Xian</dc:creator>
			<dc:creator>Longxiang Ma</dc:creator>
			<dc:creator>Jinxuan Yu</dc:creator>
			<dc:creator>Jingyu Chen</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010024</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-04</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-04</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>24</prism:startingPage>
		<prism:doi>10.3390/jimaging12010024</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/24</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/23">

	<title>J. Imaging, Vol. 12, Pages 23: State of the Art of Remote Sensing Data: Gradient Pattern in Pseudocolor Composite Images</title>
	<link>https://www.mdpi.com/2313-433X/12/1/23</link>
	<description>The thematic processing of pseudocolor composite images, especially those created from remote sensing data, is of considerable interest. The set of spectral classes comprising such images is typically described by a nominal scale, meaning the absence of any predetermined relationships between the classes. However, in many cases, images of this type may contain elements of a regular spatial order, one variant of which is a gradient structure. Gradient structures are characterized by a certain regular spatial ordering of spectral classes. Recognizing gradient patterns in the structure of pseudocolor composite images opens up new possibilities for deeper thematic images processing. This article describes an algorithm for analyzing the spatial structure of a pseudocolor composite image to identify gradient patterns. In this process, the initial nominal scale of spectral classes is transformed into a rank scale of the gradient legend. The algorithm is based on the analysis of Moore neighborhoods for each image pixel. This creates an array of the prevalence of all types of local binary patterns (the pixel&amp;amp;rsquo;s nearest neighbors). All possible variants of the spectral class rank scale composition are then considered. The rank scale variant that describes the largest proportion of image pixels within its gradient order is used as a final result. The user can independently define the criteria for the significance of the gradient order in the analyzed image, focusing either on the overall statistics of the proportion of pixels consistent with the spatial structure of the selected gradient or on the statistics of a selected key image region. The proposed algorithm is illustrated using analysis of test examples.</description>
	<pubDate>2026-01-04</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 23: State of the Art of Remote Sensing Data: Gradient Pattern in Pseudocolor Composite Images</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/23">doi: 10.3390/jimaging12010023</a></p>
	<p>Authors:
		Alexey Terekhov
		Ravil I. Mukhamediev
		Igor Savin
		</p>
	<p>The thematic processing of pseudocolor composite images, especially those created from remote sensing data, is of considerable interest. The set of spectral classes comprising such images is typically described by a nominal scale, meaning the absence of any predetermined relationships between the classes. However, in many cases, images of this type may contain elements of a regular spatial order, one variant of which is a gradient structure. Gradient structures are characterized by a certain regular spatial ordering of spectral classes. Recognizing gradient patterns in the structure of pseudocolor composite images opens up new possibilities for deeper thematic images processing. This article describes an algorithm for analyzing the spatial structure of a pseudocolor composite image to identify gradient patterns. In this process, the initial nominal scale of spectral classes is transformed into a rank scale of the gradient legend. The algorithm is based on the analysis of Moore neighborhoods for each image pixel. This creates an array of the prevalence of all types of local binary patterns (the pixel&amp;amp;rsquo;s nearest neighbors). All possible variants of the spectral class rank scale composition are then considered. The rank scale variant that describes the largest proportion of image pixels within its gradient order is used as a final result. The user can independently define the criteria for the significance of the gradient order in the analyzed image, focusing either on the overall statistics of the proportion of pixels consistent with the spatial structure of the selected gradient or on the statistics of a selected key image region. The proposed algorithm is illustrated using analysis of test examples.</p>
	]]></content:encoded>

	<dc:title>State of the Art of Remote Sensing Data: Gradient Pattern in Pseudocolor Composite Images</dc:title>
			<dc:creator>Alexey Terekhov</dc:creator>
			<dc:creator>Ravil I. Mukhamediev</dc:creator>
			<dc:creator>Igor Savin</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010023</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-04</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-04</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>23</prism:startingPage>
		<prism:doi>10.3390/jimaging12010023</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/23</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/22">

	<title>J. Imaging, Vol. 12, Pages 22: Comparative Evaluation of Vision&amp;ndash;Language Models for Detecting and Localizing Dental Lesions from Intraoral Images</title>
	<link>https://www.mdpi.com/2313-433X/12/1/22</link>
	<description>To assess the efficiency of vision&amp;amp;ndash;language models in detecting and classifying carious and non-carious lesions from intraoral photo imaging. A dataset of 172 annotated images were classified for microcavitation, cavitated lesions, staining, calculus, and non-carious lesions. Florence-2, PaLI-Gemma, and YOLOv8 models were trained on the dataset and model performance. The dataset was divided into 80:10:10 split, and the model performance was evaluated using mean average precision (mAP), mAP50-95, class-specific precision and recall. YOLOv8 outperformed the vision&amp;amp;ndash;language models, achieving a mean average precision (mAP) of 37% with a precision of 42.3% (with 100% for cavitation detection) and 31.3% recall. PaLI-Gemma produced a recall of 13% and 21%. Florence-2 yielded a mean average precision of 10% with a precision and recall was 51% and 35%. YOLOv8 achieved the strongest overall performance. Florence-2 and PaLI-Gemma models underperformed relative to YOLOv8 despite the potential for multimodal contextual understanding, highlighting the need for larger, more diverse datasets and hybrid architectures to achieve improved performance.</description>
	<pubDate>2026-01-03</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 22: Comparative Evaluation of Vision&amp;ndash;Language Models for Detecting and Localizing Dental Lesions from Intraoral Images</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/22">doi: 10.3390/jimaging12010022</a></p>
	<p>Authors:
		Maria Jahan
		Al Ibne Siam
		Lamim Zakir Pronay
		Saif Ahmed
		Nabeel Mohammed
		James Dudley
		Taseef Hasan Farook
		</p>
	<p>To assess the efficiency of vision&amp;amp;ndash;language models in detecting and classifying carious and non-carious lesions from intraoral photo imaging. A dataset of 172 annotated images were classified for microcavitation, cavitated lesions, staining, calculus, and non-carious lesions. Florence-2, PaLI-Gemma, and YOLOv8 models were trained on the dataset and model performance. The dataset was divided into 80:10:10 split, and the model performance was evaluated using mean average precision (mAP), mAP50-95, class-specific precision and recall. YOLOv8 outperformed the vision&amp;amp;ndash;language models, achieving a mean average precision (mAP) of 37% with a precision of 42.3% (with 100% for cavitation detection) and 31.3% recall. PaLI-Gemma produced a recall of 13% and 21%. Florence-2 yielded a mean average precision of 10% with a precision and recall was 51% and 35%. YOLOv8 achieved the strongest overall performance. Florence-2 and PaLI-Gemma models underperformed relative to YOLOv8 despite the potential for multimodal contextual understanding, highlighting the need for larger, more diverse datasets and hybrid architectures to achieve improved performance.</p>
	]]></content:encoded>

	<dc:title>Comparative Evaluation of Vision&amp;amp;ndash;Language Models for Detecting and Localizing Dental Lesions from Intraoral Images</dc:title>
			<dc:creator>Maria Jahan</dc:creator>
			<dc:creator>Al Ibne Siam</dc:creator>
			<dc:creator>Lamim Zakir Pronay</dc:creator>
			<dc:creator>Saif Ahmed</dc:creator>
			<dc:creator>Nabeel Mohammed</dc:creator>
			<dc:creator>James Dudley</dc:creator>
			<dc:creator>Taseef Hasan Farook</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010022</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-03</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-03</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>22</prism:startingPage>
		<prism:doi>10.3390/jimaging12010022</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/22</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/21">

	<title>J. Imaging, Vol. 12, Pages 21: Multi-Temporal Shoreline Monitoring and Analysis in Bangkok Bay, Thailand, Using Remote Sensing and GIS Techniques</title>
	<link>https://www.mdpi.com/2313-433X/12/1/21</link>
	<description>Drastic alterations have been observed in the coastline of Bangkok Bay, Thailand, over the past three decades. Understanding how coastlines change plays a key role in developing strategies for coastal protection and sustainable resource utilization. This study investigates the temporal and spatial changes in the Bangkok Bay coastline, Thailand, using remote sensing and GIS techniques from 1989 to 2024. The historical rate of coastline change for a typical segment was analyzed using the EPR method, and the underlying causes of these changes were discussed. Finally, the variation trend of the total shoreline length and the characteristics of erosion and sedimentation for a typical shoreline in Bangkok Bay, Thailand, over the past 35 years were obtained. An overall increase in coastline length was observed in Bangkok Bay, Thailand, over the 35-year period from 1989 to 2024, with a net gain from 507.23 km to 571.38 km. The rate of growth has transitioned from rapid to slow, with the most significant changes occurring during the period 1989&amp;amp;ndash;1994. Additionally, the average and maximum erosion rates for the typical shoreline segment were notably high during 1989&amp;amp;ndash;1994, with values of &amp;amp;minus;21.61 m/a and &amp;amp;minus;55.49 m/a, respectively. The maximum sedimentation rate along the coastline was relatively high from 2014 to 2024, reaching 10.57 m/a. Overall, the entire coastline of the Samut Sakhon&amp;amp;ndash;Bangkok&amp;amp;ndash;Samut Prakan Provinces underwent net erosion from 1989 to 2024, driven by a confluence of natural and anthropogenic factors.</description>
	<pubDate>2026-01-01</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 21: Multi-Temporal Shoreline Monitoring and Analysis in Bangkok Bay, Thailand, Using Remote Sensing and GIS Techniques</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/21">doi: 10.3390/jimaging12010021</a></p>
	<p>Authors:
		Yan Wang
		Adisorn Sirikham
		Jessada Konpang
		Chunguang Li
		</p>
	<p>Drastic alterations have been observed in the coastline of Bangkok Bay, Thailand, over the past three decades. Understanding how coastlines change plays a key role in developing strategies for coastal protection and sustainable resource utilization. This study investigates the temporal and spatial changes in the Bangkok Bay coastline, Thailand, using remote sensing and GIS techniques from 1989 to 2024. The historical rate of coastline change for a typical segment was analyzed using the EPR method, and the underlying causes of these changes were discussed. Finally, the variation trend of the total shoreline length and the characteristics of erosion and sedimentation for a typical shoreline in Bangkok Bay, Thailand, over the past 35 years were obtained. An overall increase in coastline length was observed in Bangkok Bay, Thailand, over the 35-year period from 1989 to 2024, with a net gain from 507.23 km to 571.38 km. The rate of growth has transitioned from rapid to slow, with the most significant changes occurring during the period 1989&amp;amp;ndash;1994. Additionally, the average and maximum erosion rates for the typical shoreline segment were notably high during 1989&amp;amp;ndash;1994, with values of &amp;amp;minus;21.61 m/a and &amp;amp;minus;55.49 m/a, respectively. The maximum sedimentation rate along the coastline was relatively high from 2014 to 2024, reaching 10.57 m/a. Overall, the entire coastline of the Samut Sakhon&amp;amp;ndash;Bangkok&amp;amp;ndash;Samut Prakan Provinces underwent net erosion from 1989 to 2024, driven by a confluence of natural and anthropogenic factors.</p>
	]]></content:encoded>

	<dc:title>Multi-Temporal Shoreline Monitoring and Analysis in Bangkok Bay, Thailand, Using Remote Sensing and GIS Techniques</dc:title>
			<dc:creator>Yan Wang</dc:creator>
			<dc:creator>Adisorn Sirikham</dc:creator>
			<dc:creator>Jessada Konpang</dc:creator>
			<dc:creator>Chunguang Li</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010021</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-01</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-01</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>21</prism:startingPage>
		<prism:doi>10.3390/jimaging12010021</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/21</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/20">

	<title>J. Imaging, Vol. 12, Pages 20: Object Detection on Road: Vehicle&amp;rsquo;s Detection Based on Re-Training Models on NVIDIA-Jetson Platform</title>
	<link>https://www.mdpi.com/2313-433X/12/1/20</link>
	<description>The increasing use of artificial intelligence (AI) and deep learning (DL) techniques has driven advances in vehicle classification and detection applications for embedded devices with deployment constraints due to computational cost and response time. In the case of urban environments with high traffic congestion, such as the city of Lima, it is important to determine the trade-off between model accuracy, type of embedded system, and the dataset used. This study was developed using a methodology adapted from the CRISP-DM approach, which included the acquisition of traffic videos in the city of Lima, their segmentation, and manual labeling. Subsequently, three SSD-based detection models (MobileNetV1-SSD, MobileNetV2-SSD-Lite, and VGG16-SSD) were trained on the NVIDIA Jetson Orin NX 16 GB platform. The results show that the VGG16-SSD model achieved the highest average precision (mAP &amp;amp;asymp;90.7%), with a longer training time, while the MobileNetV1-SSD (512&amp;amp;times;512) model achieved comparable performance (mAP &amp;amp;asymp;90.4%) with a shorter time. Additionally, data augmentation through contrast adjustment improved the detection of minority classes such as Tuk-tuk and Motorcycle. The results indicate that, among the evaluated models, MobileNetV1-SSD (512&amp;amp;times;512) achieved the best balance between accuracy and computational load for its implementation in ADAS embedded systems in congested urban environments.</description>
	<pubDate>2026-01-01</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 20: Object Detection on Road: Vehicle&amp;rsquo;s Detection Based on Re-Training Models on NVIDIA-Jetson Platform</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/20">doi: 10.3390/jimaging12010020</a></p>
	<p>Authors:
		Sleiter Ramos-Sanchez
		Jinmi Lezama
		Ricardo Yauri
		Joyce Zevallos
		</p>
	<p>The increasing use of artificial intelligence (AI) and deep learning (DL) techniques has driven advances in vehicle classification and detection applications for embedded devices with deployment constraints due to computational cost and response time. In the case of urban environments with high traffic congestion, such as the city of Lima, it is important to determine the trade-off between model accuracy, type of embedded system, and the dataset used. This study was developed using a methodology adapted from the CRISP-DM approach, which included the acquisition of traffic videos in the city of Lima, their segmentation, and manual labeling. Subsequently, three SSD-based detection models (MobileNetV1-SSD, MobileNetV2-SSD-Lite, and VGG16-SSD) were trained on the NVIDIA Jetson Orin NX 16 GB platform. The results show that the VGG16-SSD model achieved the highest average precision (mAP &amp;amp;asymp;90.7%), with a longer training time, while the MobileNetV1-SSD (512&amp;amp;times;512) model achieved comparable performance (mAP &amp;amp;asymp;90.4%) with a shorter time. Additionally, data augmentation through contrast adjustment improved the detection of minority classes such as Tuk-tuk and Motorcycle. The results indicate that, among the evaluated models, MobileNetV1-SSD (512&amp;amp;times;512) achieved the best balance between accuracy and computational load for its implementation in ADAS embedded systems in congested urban environments.</p>
	]]></content:encoded>

	<dc:title>Object Detection on Road: Vehicle&amp;amp;rsquo;s Detection Based on Re-Training Models on NVIDIA-Jetson Platform</dc:title>
			<dc:creator>Sleiter Ramos-Sanchez</dc:creator>
			<dc:creator>Jinmi Lezama</dc:creator>
			<dc:creator>Ricardo Yauri</dc:creator>
			<dc:creator>Joyce Zevallos</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010020</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2026-01-01</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2026-01-01</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>20</prism:startingPage>
		<prism:doi>10.3390/jimaging12010020</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/20</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/19">

	<title>J. Imaging, Vol. 12, Pages 19: Double-Gated Mamba Multi-Scale Adaptive Feature Learning Network for Unsupervised Single RGB Image Hyperspectral Image Reconstruction</title>
	<link>https://www.mdpi.com/2313-433X/12/1/19</link>
	<description>Existing methods for reconstructing hyperspectral images from single RGB images struggle to obtain a large number of labeled RGB-HSI paired images. These methods face issues such as detail loss, insufficient robustness, low reconstruction accuracy, and the difficulty of balancing the spatial&amp;amp;ndash;spectral trade-off. To address these challenges, a Double-Gated Mamba Multi-Scale Adaptive Feature (DMMAF) learning network model is proposed. DMMAF designs a reflection dot-product adaptive dual-noise-aware feature extraction method, which is used to supplement edge detail information in spectral images and improve robustness. DMMAF also constructs a deformable attention-based global feature extraction method and a double-gated Mamba local feature extraction approach, enhancing the interaction between local and global information during the reconstruction process, thereby improving image accuracy. Meanwhile, DMMAF introduces a structure-aware smooth loss function, which, by combining smoothing, curvature, and attention supervision losses, effectively resolves the spatial&amp;amp;ndash;spectral resolution balance problem. This network model is applied to three datasets&amp;amp;mdash;NTIRE 2020, Harvard, and CAVE&amp;amp;mdash;achieving state-of-the-art unsupervised reconstruction performance compared to existing advanced algorithms. Experiments on the NTIRE 2020, Harvard, and CAVE datasets demonstrate that this model achieves state-of-the-art unsupervised reconstruction performance. On the NTIRE 2020 dataset, our method attains MRAE, RMSE, and PSNR values of 0.133, 0.040, and 31.314, respectively. On the Harvard dataset, it achieves RMSE and PSNR values of 0.025 and 34.955, respectively, while on the CAVE dataset, it achieves RMSE and PSNR values of 0.041 and 30.983, respectively.</description>
	<pubDate>2025-12-31</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 19: Double-Gated Mamba Multi-Scale Adaptive Feature Learning Network for Unsupervised Single RGB Image Hyperspectral Image Reconstruction</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/19">doi: 10.3390/jimaging12010019</a></p>
	<p>Authors:
		Zhongmin Jiang
		Zhen Wang
		Wenju Wang
		Jifan Zhu
		</p>
	<p>Existing methods for reconstructing hyperspectral images from single RGB images struggle to obtain a large number of labeled RGB-HSI paired images. These methods face issues such as detail loss, insufficient robustness, low reconstruction accuracy, and the difficulty of balancing the spatial&amp;amp;ndash;spectral trade-off. To address these challenges, a Double-Gated Mamba Multi-Scale Adaptive Feature (DMMAF) learning network model is proposed. DMMAF designs a reflection dot-product adaptive dual-noise-aware feature extraction method, which is used to supplement edge detail information in spectral images and improve robustness. DMMAF also constructs a deformable attention-based global feature extraction method and a double-gated Mamba local feature extraction approach, enhancing the interaction between local and global information during the reconstruction process, thereby improving image accuracy. Meanwhile, DMMAF introduces a structure-aware smooth loss function, which, by combining smoothing, curvature, and attention supervision losses, effectively resolves the spatial&amp;amp;ndash;spectral resolution balance problem. This network model is applied to three datasets&amp;amp;mdash;NTIRE 2020, Harvard, and CAVE&amp;amp;mdash;achieving state-of-the-art unsupervised reconstruction performance compared to existing advanced algorithms. Experiments on the NTIRE 2020, Harvard, and CAVE datasets demonstrate that this model achieves state-of-the-art unsupervised reconstruction performance. On the NTIRE 2020 dataset, our method attains MRAE, RMSE, and PSNR values of 0.133, 0.040, and 31.314, respectively. On the Harvard dataset, it achieves RMSE and PSNR values of 0.025 and 34.955, respectively, while on the CAVE dataset, it achieves RMSE and PSNR values of 0.041 and 30.983, respectively.</p>
	]]></content:encoded>

	<dc:title>Double-Gated Mamba Multi-Scale Adaptive Feature Learning Network for Unsupervised Single RGB Image Hyperspectral Image Reconstruction</dc:title>
			<dc:creator>Zhongmin Jiang</dc:creator>
			<dc:creator>Zhen Wang</dc:creator>
			<dc:creator>Wenju Wang</dc:creator>
			<dc:creator>Jifan Zhu</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010019</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2025-12-31</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2025-12-31</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>19</prism:startingPage>
		<prism:doi>10.3390/jimaging12010019</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/19</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/18">

	<title>J. Imaging, Vol. 12, Pages 18: Revisiting Underwater Image Enhancement for Object Detection: A Unified Quality&amp;ndash;Detection Evaluation Framework</title>
	<link>https://www.mdpi.com/2313-433X/12/1/18</link>
	<description>Underwater images often suffer from severe color distortion, low contrast, and reduced visibility, motivating the widespread use of image enhancement as a preprocessing step for downstream computer vision tasks. However, recent studies have questioned whether enhancement actually improves object detection performance. In this work, we conduct a comprehensive and rigorous evaluation of nine state-of-the-art enhancement methods and their interactions with modern object detectors. We propose a unified evaluation framework that integrates (1) a distribution-level quality assessment using a composite quality index (Q-index), (2) a fine-grained per-image detection protocol based on COCO-style mAP, and (3) a mixed-set upper-bound analysis that quantifies the theoretical performance achievable through ideal selective enhancement. Our findings reveal that traditional image quality metrics do not reliably predict detection performance, and that dataset-level conclusions often overlook substantial image-level variability. Through per-image evaluation, we identify numerous cases in which enhancement significantly improves detection accuracy&amp;amp;mdash;primarily for low-quality inputs&amp;amp;mdash;while also demonstrating conditions under which enhancement degrades performance. The mixed-set analysis shows that selective enhancement can yield substantial gains over both original and fully enhanced datasets, establishing a new direction for designing enhancement models optimized for downstream vision tasks. This study provides the most comprehensive evidence to date that underwater image enhancement can be beneficial for object detection when evaluated at the appropriate granularity and guided by informed selection strategies. The data generated and code developed are publicly available.</description>
	<pubDate>2025-12-30</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 18: Revisiting Underwater Image Enhancement for Object Detection: A Unified Quality&amp;ndash;Detection Evaluation Framework</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/18">doi: 10.3390/jimaging12010018</a></p>
	<p>Authors:
		Ali Awad
		Ashraf Saleem
		Sidike Paheding
		Evan Lucas
		Serein Al-Ratrout
		Timothy C. Havens
		</p>
	<p>Underwater images often suffer from severe color distortion, low contrast, and reduced visibility, motivating the widespread use of image enhancement as a preprocessing step for downstream computer vision tasks. However, recent studies have questioned whether enhancement actually improves object detection performance. In this work, we conduct a comprehensive and rigorous evaluation of nine state-of-the-art enhancement methods and their interactions with modern object detectors. We propose a unified evaluation framework that integrates (1) a distribution-level quality assessment using a composite quality index (Q-index), (2) a fine-grained per-image detection protocol based on COCO-style mAP, and (3) a mixed-set upper-bound analysis that quantifies the theoretical performance achievable through ideal selective enhancement. Our findings reveal that traditional image quality metrics do not reliably predict detection performance, and that dataset-level conclusions often overlook substantial image-level variability. Through per-image evaluation, we identify numerous cases in which enhancement significantly improves detection accuracy&amp;amp;mdash;primarily for low-quality inputs&amp;amp;mdash;while also demonstrating conditions under which enhancement degrades performance. The mixed-set analysis shows that selective enhancement can yield substantial gains over both original and fully enhanced datasets, establishing a new direction for designing enhancement models optimized for downstream vision tasks. This study provides the most comprehensive evidence to date that underwater image enhancement can be beneficial for object detection when evaluated at the appropriate granularity and guided by informed selection strategies. The data generated and code developed are publicly available.</p>
	]]></content:encoded>

	<dc:title>Revisiting Underwater Image Enhancement for Object Detection: A Unified Quality&amp;amp;ndash;Detection Evaluation Framework</dc:title>
			<dc:creator>Ali Awad</dc:creator>
			<dc:creator>Ashraf Saleem</dc:creator>
			<dc:creator>Sidike Paheding</dc:creator>
			<dc:creator>Evan Lucas</dc:creator>
			<dc:creator>Serein Al-Ratrout</dc:creator>
			<dc:creator>Timothy C. Havens</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010018</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2025-12-30</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2025-12-30</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>18</prism:startingPage>
		<prism:doi>10.3390/jimaging12010018</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/18</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/17">

	<title>J. Imaging, Vol. 12, Pages 17: Advancing Medical Decision-Making with AI: A Comprehensive Exploration of the Evolution from Convolutional Neural Networks to Capsule Networks</title>
	<link>https://www.mdpi.com/2313-433X/12/1/17</link>
	<description>In this paper, we propose a literature review regarding two deep learning architectures, namely Convolutional Neural Networks (CNNs) and Capsule Networks (CapsNets), applied to medical images, in order to analyze them to help in medical decision support. CNNs demonstrate their capacity in the medical diagnostic field; however, their reliability decreases when there is slight spatial variability, which can affect diagnosis, especially since the anatomical structure of the human body can differ from one patient to another. In contrast, CapsNets encode not only feature activation but also spatial relationships, hence improving the reliability and stability of model generalization. This paper proposes a structured comparison by reviewing studies published from 2018 to 2025 across major databases, including IEEE Xplore, ScienceDirect, SpringerLink, and MDPI. The applications in the reviewed papers are based on the benchmark datasets BraTS, INbreast, ISIC, and COVIDx. This paper review compares the core architectural principles, performance, and interpretability of both architectures. To conclude the paper, we underline the complementary roles of these two architectures in medical decision-making and propose future directions toward hybrid, explainable, and computationally efficient deep learning systems for real clinical environments, thereby increasing survival rates by helping prevent diseases at an early stage.</description>
	<pubDate>2025-12-30</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 17: Advancing Medical Decision-Making with AI: A Comprehensive Exploration of the Evolution from Convolutional Neural Networks to Capsule Networks</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/17">doi: 10.3390/jimaging12010017</a></p>
	<p>Authors:
		Ichrak Khoulqi
		Zakariae El Ouazzani
		</p>
	<p>In this paper, we propose a literature review regarding two deep learning architectures, namely Convolutional Neural Networks (CNNs) and Capsule Networks (CapsNets), applied to medical images, in order to analyze them to help in medical decision support. CNNs demonstrate their capacity in the medical diagnostic field; however, their reliability decreases when there is slight spatial variability, which can affect diagnosis, especially since the anatomical structure of the human body can differ from one patient to another. In contrast, CapsNets encode not only feature activation but also spatial relationships, hence improving the reliability and stability of model generalization. This paper proposes a structured comparison by reviewing studies published from 2018 to 2025 across major databases, including IEEE Xplore, ScienceDirect, SpringerLink, and MDPI. The applications in the reviewed papers are based on the benchmark datasets BraTS, INbreast, ISIC, and COVIDx. This paper review compares the core architectural principles, performance, and interpretability of both architectures. To conclude the paper, we underline the complementary roles of these two architectures in medical decision-making and propose future directions toward hybrid, explainable, and computationally efficient deep learning systems for real clinical environments, thereby increasing survival rates by helping prevent diseases at an early stage.</p>
	]]></content:encoded>

	<dc:title>Advancing Medical Decision-Making with AI: A Comprehensive Exploration of the Evolution from Convolutional Neural Networks to Capsule Networks</dc:title>
			<dc:creator>Ichrak Khoulqi</dc:creator>
			<dc:creator>Zakariae El Ouazzani</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010017</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2025-12-30</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2025-12-30</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Review</prism:section>
	<prism:startingPage>17</prism:startingPage>
		<prism:doi>10.3390/jimaging12010017</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/17</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/16">

	<title>J. Imaging, Vol. 12, Pages 16: FluoNeRF: Fluorescent Novel-View Synthesis Under Novel Light Source Colors and Spectra</title>
	<link>https://www.mdpi.com/2313-433X/12/1/16</link>
	<description>Synthesizing photo-realistic images of a scene from arbitrary viewpoints and under arbitrary lighting environments is one of the important research topics in computer vision and graphics. In this paper, we propose a method for synthesizing photo-realistic images of a scene with fluorescent objects from novel viewpoints and under novel lighting colors and spectra. In general, fluorescent materials absorb light with certain wavelengths and then emit light with longer wavelengths than the absorbed ones, in contrast to reflective materials, which preserve wavelengths of light. Therefore, we cannot reproduce the colors of fluorescent objects under arbitrary lighting colors by combining conventional view synthesis techniques with the white balance adjustment of the RGB channels. Accordingly, we extend the novel-view synthesis based on the neural radiance fields by incorporating the superposition principle of light; our proposed method captures a sparse set of images of a scene from varying viewpoints and under varying lighting colors or spectra with active lighting systems such as a color display or a multi-spectral light stage and then synthesizes photo-realistic images of the scene without explicitly modeling its geometric and photometric models. We conducted a number of experiments using real images captured with an LCD and confirmed that our method works better than the existing methods. Moreover, we showed that the extension of our method using more than three primary colors with a light stage enables us to reproduce the colors of fluorescent objects under common light sources.</description>
	<pubDate>2025-12-29</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 16: FluoNeRF: Fluorescent Novel-View Synthesis Under Novel Light Source Colors and Spectra</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/16">doi: 10.3390/jimaging12010016</a></p>
	<p>Authors:
		Lin Shi
		Kengo Matsufuji
		Michitaka Yoshida
		Ryo Kawahara
		Takahiro Okabe
		</p>
	<p>Synthesizing photo-realistic images of a scene from arbitrary viewpoints and under arbitrary lighting environments is one of the important research topics in computer vision and graphics. In this paper, we propose a method for synthesizing photo-realistic images of a scene with fluorescent objects from novel viewpoints and under novel lighting colors and spectra. In general, fluorescent materials absorb light with certain wavelengths and then emit light with longer wavelengths than the absorbed ones, in contrast to reflective materials, which preserve wavelengths of light. Therefore, we cannot reproduce the colors of fluorescent objects under arbitrary lighting colors by combining conventional view synthesis techniques with the white balance adjustment of the RGB channels. Accordingly, we extend the novel-view synthesis based on the neural radiance fields by incorporating the superposition principle of light; our proposed method captures a sparse set of images of a scene from varying viewpoints and under varying lighting colors or spectra with active lighting systems such as a color display or a multi-spectral light stage and then synthesizes photo-realistic images of the scene without explicitly modeling its geometric and photometric models. We conducted a number of experiments using real images captured with an LCD and confirmed that our method works better than the existing methods. Moreover, we showed that the extension of our method using more than three primary colors with a light stage enables us to reproduce the colors of fluorescent objects under common light sources.</p>
	]]></content:encoded>

	<dc:title>FluoNeRF: Fluorescent Novel-View Synthesis Under Novel Light Source Colors and Spectra</dc:title>
			<dc:creator>Lin Shi</dc:creator>
			<dc:creator>Kengo Matsufuji</dc:creator>
			<dc:creator>Michitaka Yoshida</dc:creator>
			<dc:creator>Ryo Kawahara</dc:creator>
			<dc:creator>Takahiro Okabe</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010016</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2025-12-29</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2025-12-29</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>16</prism:startingPage>
		<prism:doi>10.3390/jimaging12010016</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/16</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/15">

	<title>J. Imaging, Vol. 12, Pages 15: M3-TransUNet: Medical Image Segmentation Based on Spatial Prior Attention and Multi-Scale Gating</title>
	<link>https://www.mdpi.com/2313-433X/12/1/15</link>
	<description>Medical image segmentation presents substantial challenges arising from the diverse scales and morphological complexities of target anatomical structures. Although existing Transformer-based models excel at capturing global dependencies, they encounter critical bottlenecks in multi-scale feature representation, spatial relationship modeling, and cross-layer feature fusion. To address these limitations, we propose the M3-TransUNet architecture, which incorporates three key innovations: (1) MSGA (Multi-Scale Gate Attention) and MSSA (Multi-Scale Selective Attention) modules to enhance multi-scale feature representation; (2) ME-MSA (Manhattan Enhanced Multi-Head Self-Attention) to integrate spatial priors into self-attention computations, thereby overcoming spatial modeling deficiencies; and (3) MKGAG (Multi-kernel Gated Attention Gate) to optimize skip connections by precisely filtering noise and preserving boundary details. Extensive experiments on public datasets&amp;amp;mdash;including Synapse, CVC-ClinicDB, and ISIC&amp;amp;mdash;demonstrate that M3-TransUNet achieves state-of-the-art performance. Specifically, on the Synapse dataset, our model outperforms recent TransUNet variants such as J-CAPA, improving the average DSC to 82.79% (compared to 82.29%) and significantly reducing the average HD95 from 19.74 mm to 10.21 mm.</description>
	<pubDate>2025-12-29</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 15: M3-TransUNet: Medical Image Segmentation Based on Spatial Prior Attention and Multi-Scale Gating</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/15">doi: 10.3390/jimaging12010015</a></p>
	<p>Authors:
		Zhigao Zeng
		Jiale Xiao
		Shengqiu Yi
		Qiang Liu
		Yanhui Zhu
		</p>
	<p>Medical image segmentation presents substantial challenges arising from the diverse scales and morphological complexities of target anatomical structures. Although existing Transformer-based models excel at capturing global dependencies, they encounter critical bottlenecks in multi-scale feature representation, spatial relationship modeling, and cross-layer feature fusion. To address these limitations, we propose the M3-TransUNet architecture, which incorporates three key innovations: (1) MSGA (Multi-Scale Gate Attention) and MSSA (Multi-Scale Selective Attention) modules to enhance multi-scale feature representation; (2) ME-MSA (Manhattan Enhanced Multi-Head Self-Attention) to integrate spatial priors into self-attention computations, thereby overcoming spatial modeling deficiencies; and (3) MKGAG (Multi-kernel Gated Attention Gate) to optimize skip connections by precisely filtering noise and preserving boundary details. Extensive experiments on public datasets&amp;amp;mdash;including Synapse, CVC-ClinicDB, and ISIC&amp;amp;mdash;demonstrate that M3-TransUNet achieves state-of-the-art performance. Specifically, on the Synapse dataset, our model outperforms recent TransUNet variants such as J-CAPA, improving the average DSC to 82.79% (compared to 82.29%) and significantly reducing the average HD95 from 19.74 mm to 10.21 mm.</p>
	]]></content:encoded>

	<dc:title>M3-TransUNet: Medical Image Segmentation Based on Spatial Prior Attention and Multi-Scale Gating</dc:title>
			<dc:creator>Zhigao Zeng</dc:creator>
			<dc:creator>Jiale Xiao</dc:creator>
			<dc:creator>Shengqiu Yi</dc:creator>
			<dc:creator>Qiang Liu</dc:creator>
			<dc:creator>Yanhui Zhu</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010015</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2025-12-29</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2025-12-29</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>15</prism:startingPage>
		<prism:doi>10.3390/jimaging12010015</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/15</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/14">

	<title>J. Imaging, Vol. 12, Pages 14: Adaptive Normalization Enhances the Generalization of Deep Learning Model in Chest X-Ray Classification</title>
	<link>https://www.mdpi.com/2313-433X/12/1/14</link>
	<description>This study presents a controlled benchmarking analysis of min&amp;amp;ndash;max scaling, Z-score normalization, and an adaptive preprocessing pipeline that combines percentile-based ROI cropping with histogram standardization. The evaluation was conducted across four public chest X-ray (CXR) datasets and three convolutional neural network architectures under controlled experimental settings. The adaptive pipeline generally improved accuracy, F1-score, and training stability on datasets with relatively stable contrast characteristics while yielding limited gains on MIMIC-CXR due to strong acquisition heterogeneity. Ablation experiments showed that histogram standardization provided the primary performance contribution, with ROI cropping offering complementary benefits, and the full pipeline achieving the best overall performance. The computational overhead of the adaptive preprocessing was minimal (+6.3% training-time cost; 5.2 ms per batch). Friedman&amp;amp;ndash;Nemenyi and Wilcoxon signed-rank tests confirmed that the observed improvements were statistically significant across most dataset&amp;amp;ndash;model configurations. Overall, adaptive normalization is positioned not as a novel algorithmic contribution, but as a practical preprocessing design choice that can enhance cross-dataset robustness and reliability in chest X-ray classification workflows.</description>
	<pubDate>2025-12-28</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 14: Adaptive Normalization Enhances the Generalization of Deep Learning Model in Chest X-Ray Classification</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/14">doi: 10.3390/jimaging12010014</a></p>
	<p>Authors:
		Jatsada Singthongchai
		Tanachapong Wangkhamhan
		</p>
	<p>This study presents a controlled benchmarking analysis of min&amp;amp;ndash;max scaling, Z-score normalization, and an adaptive preprocessing pipeline that combines percentile-based ROI cropping with histogram standardization. The evaluation was conducted across four public chest X-ray (CXR) datasets and three convolutional neural network architectures under controlled experimental settings. The adaptive pipeline generally improved accuracy, F1-score, and training stability on datasets with relatively stable contrast characteristics while yielding limited gains on MIMIC-CXR due to strong acquisition heterogeneity. Ablation experiments showed that histogram standardization provided the primary performance contribution, with ROI cropping offering complementary benefits, and the full pipeline achieving the best overall performance. The computational overhead of the adaptive preprocessing was minimal (+6.3% training-time cost; 5.2 ms per batch). Friedman&amp;amp;ndash;Nemenyi and Wilcoxon signed-rank tests confirmed that the observed improvements were statistically significant across most dataset&amp;amp;ndash;model configurations. Overall, adaptive normalization is positioned not as a novel algorithmic contribution, but as a practical preprocessing design choice that can enhance cross-dataset robustness and reliability in chest X-ray classification workflows.</p>
	]]></content:encoded>

	<dc:title>Adaptive Normalization Enhances the Generalization of Deep Learning Model in Chest X-Ray Classification</dc:title>
			<dc:creator>Jatsada Singthongchai</dc:creator>
			<dc:creator>Tanachapong Wangkhamhan</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010014</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2025-12-28</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2025-12-28</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>14</prism:startingPage>
		<prism:doi>10.3390/jimaging12010014</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/14</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
        <item rdf:about="https://www.mdpi.com/2313-433X/12/1/13">

	<title>J. Imaging, Vol. 12, Pages 13: Assessing Change in Stone Burden on Baseline and Follow-Up CT: Radiologist and Radiomics Evaluations</title>
	<link>https://www.mdpi.com/2313-433X/12/1/13</link>
	<description>This retrospective diagnostic accuracy study compared radiologist-based qualitative assessments and radiomics-based analyses with an automated artificial intelligence (AI)&amp;amp;ndash;based volumetric approach for evaluating changes in kidney stone burden on follow-up CT examinations. With institutional review board approval, 157 patients (mean age, 61 &amp;amp;plusmn; 13 years; 99 men, 58 women) who underwent baseline and follow-up non-contrast abdomen&amp;amp;ndash;pelvis CT for kidney stone evaluation were included. The index test was an automated AI-based whole-kidney and stone segmentation radiomics prototype (Frontier, Siemens Healthineers), which segmented both kidneys and isolated stone volumes using a fixed threshold of 130 Hounsfield units, providing stone volume and maximum diameter per kidney. The reference standard was a threshold-defined volumetric assessment of stone burden change between baseline and follow-up CTs. The radiologist&amp;amp;rsquo;s performance was assessed using (1) interpretations from clinical radiology reports and (2) an independent radiologist&amp;amp;rsquo;s assessment of stone burden change (stable, increased, or decreased). Diagnostic accuracy was evaluated using multivariable logistic regression and receiver operating characteristic (ROC) analysis. Automated volumetric assessment identified stable (n = 44), increased (n = 109), and decreased (n = 108) stone burden across the evaluated kidneys. Qualitative assessments from radiology reports demonstrated weak diagnostic performance (AUC range, 0.55&amp;amp;ndash;0.62), similar to the independent radiologist (AUC range, 0.41&amp;amp;ndash;0.72) for differentiating changes in stone burden. A model incorporating higher-order radiomics features achieved an AUC of 0.71 for distinguishing increased versus decreased stone burdens compared with the baseline CT (p &amp;amp;lt; 0.001), but did not outperform threshold-based volumetric assessment. The automated threshold-based volumetric quantification of kidney stone burdens provides higher diagnostic accuracy than qualitative radiologist assessments and radiomics-based analyses for identifying a stable, increased, or decreased stone burden on follow-up CT examinations.</description>
	<pubDate>2025-12-27</pubDate>

	<content:encoded><![CDATA[
	<p><b>J. Imaging, Vol. 12, Pages 13: Assessing Change in Stone Burden on Baseline and Follow-Up CT: Radiologist and Radiomics Evaluations</b></p>
	<p>Journal of Imaging <a href="https://www.mdpi.com/2313-433X/12/1/13">doi: 10.3390/jimaging12010013</a></p>
	<p>Authors:
		Parisa Kaviani
		Matthias F. Froelich
		Bernardo Bizzo
		Andrew Primak
		Giridhar Dasegowda
		Emiliano Garza-Frias
		Lina Karout
		Anushree Burade
		Seyedehelaheh Hosseini
		Javier Eduardo Contreras Yametti
		Keith Dreyer
		Sanjay Saini
		Mannudeep Kalra
		</p>
	<p>This retrospective diagnostic accuracy study compared radiologist-based qualitative assessments and radiomics-based analyses with an automated artificial intelligence (AI)&amp;amp;ndash;based volumetric approach for evaluating changes in kidney stone burden on follow-up CT examinations. With institutional review board approval, 157 patients (mean age, 61 &amp;amp;plusmn; 13 years; 99 men, 58 women) who underwent baseline and follow-up non-contrast abdomen&amp;amp;ndash;pelvis CT for kidney stone evaluation were included. The index test was an automated AI-based whole-kidney and stone segmentation radiomics prototype (Frontier, Siemens Healthineers), which segmented both kidneys and isolated stone volumes using a fixed threshold of 130 Hounsfield units, providing stone volume and maximum diameter per kidney. The reference standard was a threshold-defined volumetric assessment of stone burden change between baseline and follow-up CTs. The radiologist&amp;amp;rsquo;s performance was assessed using (1) interpretations from clinical radiology reports and (2) an independent radiologist&amp;amp;rsquo;s assessment of stone burden change (stable, increased, or decreased). Diagnostic accuracy was evaluated using multivariable logistic regression and receiver operating characteristic (ROC) analysis. Automated volumetric assessment identified stable (n = 44), increased (n = 109), and decreased (n = 108) stone burden across the evaluated kidneys. Qualitative assessments from radiology reports demonstrated weak diagnostic performance (AUC range, 0.55&amp;amp;ndash;0.62), similar to the independent radiologist (AUC range, 0.41&amp;amp;ndash;0.72) for differentiating changes in stone burden. A model incorporating higher-order radiomics features achieved an AUC of 0.71 for distinguishing increased versus decreased stone burdens compared with the baseline CT (p &amp;amp;lt; 0.001), but did not outperform threshold-based volumetric assessment. The automated threshold-based volumetric quantification of kidney stone burdens provides higher diagnostic accuracy than qualitative radiologist assessments and radiomics-based analyses for identifying a stable, increased, or decreased stone burden on follow-up CT examinations.</p>
	]]></content:encoded>

	<dc:title>Assessing Change in Stone Burden on Baseline and Follow-Up CT: Radiologist and Radiomics Evaluations</dc:title>
			<dc:creator>Parisa Kaviani</dc:creator>
			<dc:creator>Matthias F. Froelich</dc:creator>
			<dc:creator>Bernardo Bizzo</dc:creator>
			<dc:creator>Andrew Primak</dc:creator>
			<dc:creator>Giridhar Dasegowda</dc:creator>
			<dc:creator>Emiliano Garza-Frias</dc:creator>
			<dc:creator>Lina Karout</dc:creator>
			<dc:creator>Anushree Burade</dc:creator>
			<dc:creator>Seyedehelaheh Hosseini</dc:creator>
			<dc:creator>Javier Eduardo Contreras Yametti</dc:creator>
			<dc:creator>Keith Dreyer</dc:creator>
			<dc:creator>Sanjay Saini</dc:creator>
			<dc:creator>Mannudeep Kalra</dc:creator>
		<dc:identifier>doi: 10.3390/jimaging12010013</dc:identifier>
	<dc:source>Journal of Imaging</dc:source>
	<dc:date>2025-12-27</dc:date>

	<prism:publicationName>Journal of Imaging</prism:publicationName>
	<prism:publicationDate>2025-12-27</prism:publicationDate>
	<prism:volume>12</prism:volume>
	<prism:number>1</prism:number>
	<prism:section>Article</prism:section>
	<prism:startingPage>13</prism:startingPage>
		<prism:doi>10.3390/jimaging12010013</prism:doi>
	<prism:url>https://www.mdpi.com/2313-433X/12/1/13</prism:url>
	
	<cc:license rdf:resource="CC BY 4.0"/>
</item>
    
<cc:License rdf:about="https://creativecommons.org/licenses/by/4.0/">
	<cc:permits rdf:resource="https://creativecommons.org/ns#Reproduction" />
	<cc:permits rdf:resource="https://creativecommons.org/ns#Distribution" />
	<cc:permits rdf:resource="https://creativecommons.org/ns#DerivativeWorks" />
</cc:License>

</rdf:RDF>
