From 1ae2b934906d603afef6b158e139e17eefa0b69d Mon Sep 17 00:00:00 2001 From: chiDT Date: Wed, 17 Sep 2025 22:35:06 +0900 Subject: [PATCH] =?UTF-8?q?=EC=A0=84=EB=9D=BC=EB=82=A8=EB=8F=84=EB=A6=BD?= =?UTF-8?q?=EB=8F=84=EC=84=9C=EA=B4=80=20=EA=B2=80=EC=83=89=EA=B8=B0=20?= =?UTF-8?q?=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - JeonnamProvLibSearcher.cs 추가 (HTTP 방식) - 실제 사이트 URL 파라미터 구조 분석 및 적용 - HTML 결과 패턴 정규식 구현 (전체 N개가 검색되었습니다) - Check_copyWD.cs에 검색기 등록 - 버전 업데이트 (2025.09.17.2300) 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- unimarc/unimarc/Properties/AssemblyInfo.cs | 4 +- .../SearchModel/JeonnamProvLibSearcher.cs | 263 ++++++++++++++++++ unimarc/unimarc/Skill.cs | 21 +- unimarc/unimarc/UniMarc.csproj | 1 + unimarc/unimarc/마크/Check_copyWD.cs | 2 + 5 files changed, 280 insertions(+), 11 deletions(-) create mode 100644 unimarc/unimarc/SearchModel/JeonnamProvLibSearcher.cs diff --git a/unimarc/unimarc/Properties/AssemblyInfo.cs b/unimarc/unimarc/Properties/AssemblyInfo.cs index 7b74652..3fe38ac 100644 --- a/unimarc/unimarc/Properties/AssemblyInfo.cs +++ b/unimarc/unimarc/Properties/AssemblyInfo.cs @@ -32,5 +32,5 @@ using System.Runtime.InteropServices; // 모든 값을 지정하거나 아래와 같이 '*'를 사용하여 빌드 번호 및 수정 번호를 // 기본값으로 할 수 있습니다. // [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("2025.09.16.0000")] -[assembly: AssemblyFileVersion("2025.09.16.0000")] +[assembly: AssemblyVersion("2025.09.17.2300")] +[assembly: AssemblyFileVersion("2025.09.17.2300")] diff --git a/unimarc/unimarc/SearchModel/JeonnamProvLibSearcher.cs b/unimarc/unimarc/SearchModel/JeonnamProvLibSearcher.cs new file mode 100644 index 0000000..ca855ed --- /dev/null +++ b/unimarc/unimarc/SearchModel/JeonnamProvLibSearcher.cs @@ -0,0 +1,263 @@ +using System; +using System.Net.Http; +using System.Threading.Tasks; +using System.Text.RegularExpressions; +using System.Web; +using UniMarc.SearchModel; +using System.Text; +using OpenQA.Selenium.Support.UI; + +namespace BokBonCheck +{ + public class JeonnamProvLibSearcher : ILibrarySearcher + { + public string AreaCode { get; set; } = string.Empty; + public string SiteName { get; protected set; } + public string SiteUrl => "https://lib.jeonnam.go.kr/plus/search_list.php"; + public bool HttpApiMode { get; set; } = true; + + public int No { get; set; } + + private static readonly HttpClient _httpClient = new HttpClient() + { + DefaultRequestHeaders = + { + { "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" } + } + }; + + public JeonnamProvLibSearcher(int no, string areaCode, string areaName) + { + this.No = no; + this.AreaCode = areaCode; + this.SiteName = $"전라남도립({areaName})"; + } + + public async Task StartDriver(bool showdriver = false) + { + // HTTP 클라이언트 사용으로 별도 드라이버 불필요 + await Task.CompletedTask; + } + + public void StopDriver() + { + // HTTP 클라이언트 사용으로 별도 정리 불필요 + } + + public async Task SearchAsync(string searchTerm) + { + var result = new BookSearchResult + { + SiteName = SiteName, + SearchTerm = searchTerm, + SearchTime = DateTime.Now + }; + + try + { + // 검색어 URL 인코딩 + var encodedSearchTerm = HttpUtility.UrlEncode(searchTerm, Encoding.UTF8); + + // 실제 검색 URL 구성 (사용자가 확인한 정확한 파라미터 사용) + var searchUrl = $"{SiteUrl}?act=1&aon1=AND&msa=M&jongbook=1&value1={encodedSearchTerm}&field1=IAL&formclass=&local=&sort="; + + Console.WriteLine($"전라남도립도서관 검색 URL: {searchUrl}"); + + // HTTP GET 요청 실행 (추가 헤더 포함) + using (var request = new HttpRequestMessage(HttpMethod.Get, searchUrl)) + { + // 브라우저와 유사한 헤더 추가 + request.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); + request.Headers.Add("Accept-Language", "ko-KR,ko;q=0.8,en-US;q=0.5,en;q=0.3"); + request.Headers.Add("Connection", "keep-alive"); + request.Headers.Add("Upgrade-Insecure-Requests", "1"); + request.Headers.Add("Referer", "https://lib.jeonnam.go.kr/plus/search_simple.php"); + + var response = await _httpClient.SendAsync(request); + + if (!response.IsSuccessStatusCode) + { + var errorContent = await response.Content.ReadAsStringAsync(); + throw new HttpRequestException($"HTTP {(int)response.StatusCode} {response.StatusCode}: {errorContent}"); + } + + var htmlContent = await response.Content.ReadAsStringAsync(); + + // 검색 결과 수 추출 + var resultCount = ExtractBookCount(htmlContent, out string errorMessage, out string resultHtml); + result.Resulthtml = resultHtml; + + if (resultCount == -1) + { + result.BookCount = 0; + result.IsSuccess = false; + result.ErrorMessage = errorMessage; + } + else + { + result.BookCount = resultCount; + result.IsSuccess = true; + result.ErrorMessage = $"검색성공({resultCount}권)"; + } + } + + } + catch (Exception ex) + { + result.IsSuccess = false; + result.ErrorMessage = $"검색 오류: {ex.Message}"; + result.BookCount = 0; + Console.WriteLine($"전라남도립도서관 검색 오류: {ex.Message}"); + } + + return result; + } + + private int ExtractBookCount(string htmlContent, out string errorMessage, out string resulthtml) + { + errorMessage = string.Empty; + resulthtml = string.Empty; + + try + { + // 전라남도립도서관 실제 HTML 패턴: 전체 1개가 검색되었습니다 + var patterns = new[] + { + @"]*>전체\s*(\d+)\s*개가\s*검색되었습니다", + @"'[^']*'\s*에\s*대하여\s*]*>전체\s*(\d+)\s*개가\s*검색되었습니다", + @"전체\s*(\d+)\s*개가\s*검색되었습니다" + }; + + foreach (var pattern in patterns) + { + var match = Regex.Match(htmlContent, pattern, RegexOptions.IgnoreCase); + if (match.Success) + { + if (int.TryParse(match.Groups[1].Value, out int count)) + { + if (count == 0) + { + errorMessage = "검색결과없음"; + resulthtml = match.Value; + return 0; + } + // 매칭된 부분과 그 상위 태그를 찾아서 저장 + resulthtml = ExtractResultContext(htmlContent, match); + errorMessage = $"검색성공({count}권)"; + return count; + } + } + } + + // Page X / Y 패턴으로도 확인 (총 페이지 수에서 결과 유무 판단) + var pagePattern = @"Page\s*\d+\s*/\s*(\d+)"; + var pageMatch = Regex.Match(htmlContent, pagePattern, RegexOptions.IgnoreCase); + if (pageMatch.Success) + { + if (int.TryParse(pageMatch.Groups[1].Value, out int totalPages)) + { + if (totalPages == 0) + { + errorMessage = "검색결과없음"; + resulthtml = pageMatch.Value; + return 0; + } + // 페이지가 있지만 정확한 개수를 알 수 없는 경우 -1 반환 + resulthtml = pageMatch.Value; + errorMessage = "결과수량을찾을수없음"; + return -1; + } + } + + // 검색 결과가 없다는 메시지 확인 + if (htmlContent.Contains("검색결과가 없습니다") || + htmlContent.Contains("검색된 자료가 없습니다") || + htmlContent.Contains("자료가 없습니다") || + htmlContent.Contains("개가 검색되었습니다") && !Regex.IsMatch(htmlContent, @"\d+\s*개가")) + { + errorMessage = "검색결과없음"; + resulthtml = "검색결과없음"; + return 0; + } + + // 패턴을 찾지 못한 경우 + resulthtml = "검색결과 패턴을 찾을 수 없음"; + errorMessage = "검색결과 패턴을 찾을 수 없음"; + return -1; + } + catch (Exception ex) + { + errorMessage = $"결과 분석 오류: {ex.Message}"; + resulthtml = "검색결과 패턴을 찾을 수 없음"; + return -1; + } + } + + public Task WaitForPageChange(WebDriverWait wait) + { + throw new NotImplementedException(); + } + + /// + /// 매칭된 결과와 그 상위 태그를 추출 + /// + private string ExtractResultContext(string htmlContent, Match match) + { + try + { + var matchIndex = match.Index; + var matchLength = match.Length; + + // 매칭된 위치 앞쪽에서 상위 태그 시작 찾기 + var startSearchIndex = Math.Max(0, matchIndex - 200); // 매칭 위치 200자 전부터 검색 + var searchText = htmlContent.Substring(startSearchIndex, matchIndex - startSearchIndex + matchLength + Math.Min(200, htmlContent.Length - matchIndex - matchLength)); + + // 상위 태그 패턴들 (div, p, h1-h6, span 등) + var tagPatterns = new[] { @"<(div|p|h[1-6]|span|section|article)[^>]*>", @"<[^>]+>" }; + + string resultContext = match.Value; // 기본값은 매칭된 부분만 + + foreach (var tagPattern in tagPatterns) + { + // 매칭된 부분 앞에서 가장 가까운 태그 시작 찾기 + var tagMatches = Regex.Matches(searchText, tagPattern, RegexOptions.IgnoreCase); + + for (int i = tagMatches.Count - 1; i >= 0; i--) + { + var tagMatch = tagMatches[i]; + if (tagMatch.Index < (matchIndex - startSearchIndex)) + { + // 태그 이름 추출 + var tagName = Regex.Match(tagMatch.Value, @"<(\w+)", RegexOptions.IgnoreCase).Groups[1].Value; + + // 닫는 태그 찾기 + var closeTagPattern = $@"]*>"; + var closeMatch = Regex.Match(searchText, closeTagPattern, RegexOptions.IgnoreCase); + + if (closeMatch.Success && closeMatch.Index > (matchIndex - startSearchIndex)) + { + // 상위 태그와 그 내용을 포함하여 반환 + var startIdx = tagMatch.Index; + var endIdx = closeMatch.Index + closeMatch.Length; + resultContext = searchText.Substring(startIdx, Math.Min(endIdx - startIdx, 500)); // 최대 500자 + return resultContext; + } + } + } + } + + // 상위 태그를 찾지 못한 경우, 매칭 전후 50자씩 포함 + var contextStart = Math.Max(0, matchIndex - 50); + var contextEnd = Math.Min(htmlContent.Length, matchIndex + matchLength + 50); + resultContext = htmlContent.Substring(contextStart, contextEnd - contextStart); + + return resultContext; + } + catch (Exception ex) + { + Console.WriteLine($"ExtractResultContext 오류: {ex.Message}"); + return match.Value; // 오류 시 매칭된 부분만 반환 + } + } + } +} \ No newline at end of file diff --git a/unimarc/unimarc/Skill.cs b/unimarc/unimarc/Skill.cs index 373c040..8eebd8a 100644 --- a/unimarc/unimarc/Skill.cs +++ b/unimarc/unimarc/Skill.cs @@ -23,6 +23,7 @@ using System.Drawing.Text; using System.Globalization; using System.Threading; using System.Data.SqlTypes; +using AR; namespace WindowsFormsApp1 { @@ -135,14 +136,16 @@ namespace WindowsFormsApp1 // stringInClipboard= stringInClipboard.Replace("\r", ""); if (stringInClipboard == null) return; List rowsInClipboard = stringInClipboard.Split(rowSpliteter, StringSplitOptions.None).ToList(); - rowsInClipboard.RemoveAt(rowsInClipboard.Count - 1); - //get the row and column of selected cell in dataGridView1 - int r = ((DataGridView)sender).SelectedCells[0].RowIndex; - int c = ((DataGridView)sender).SelectedCells[0].ColumnIndex; + if(rowsInClipboard.Last().isEmpty()) rowsInClipboard.RemoveAt(rowsInClipboard.Count - 1); + + var dv = sender as DataGridView; + + int r = dv.SelectedCells[0].RowIndex; + int c = dv.SelectedCells[0].ColumnIndex; //add rows into dataGridView1 to fit clipboard lines - if (((DataGridView)sender).Rows.Count < (r + rowsInClipboard.Count)) + if (dv.Rows.Count < (r + rowsInClipboard.Count)) { - ((DataGridView)sender).Rows.Add(r + rowsInClipboard.Count - ((DataGridView)sender).Rows.Count); + dv.Rows.Add(r + rowsInClipboard.Count - dv.Rows.Count); } // loop through the lines, split them into cells and place the values in the corresponding cell. for (int iRow = 0; iRow < rowsInClipboard.Count; iRow++) @@ -153,10 +156,10 @@ namespace WindowsFormsApp1 for (int iCol = 0; iCol < valuesInRow.Length; iCol++) { //assign cell value, only if it within columns of the dataGridView1 - if (((DataGridView)sender).ColumnCount - 1 >= c + iCol) + if (dv.ColumnCount - 1 >= c + iCol) { - if (((DataGridView)sender).Rows.Count <= r + iRow) continue; - ((DataGridView)sender).Rows[r + iRow].Cells[c + iCol].Value = valuesInRow[iCol]; + if (dv.Rows.Count <= r + iRow) continue; + dv.Rows[r + iRow].Cells[c + iCol].Value = valuesInRow[iCol]; } } } diff --git a/unimarc/unimarc/UniMarc.csproj b/unimarc/unimarc/UniMarc.csproj index 727ccc0..6f43aa8 100644 --- a/unimarc/unimarc/UniMarc.csproj +++ b/unimarc/unimarc/UniMarc.csproj @@ -249,6 +249,7 @@ + diff --git a/unimarc/unimarc/마크/Check_copyWD.cs b/unimarc/unimarc/마크/Check_copyWD.cs index 00422f6..044b062 100644 --- a/unimarc/unimarc/마크/Check_copyWD.cs +++ b/unimarc/unimarc/마크/Check_copyWD.cs @@ -422,6 +422,8 @@ namespace WindowsFormsApp1.Mac _searchService.AddSearcher(new GochangLibSearcher(idx++, "MH", "흥덕가온누리작은도서관")); _searchService.AddSearcher(new GochangLibSearcher(idx++, "MI", "공음참나무골작은도서관")); + //전라남도립도서관 추가 250917 + _searchService.AddSearcher(new JeonnamProvLibSearcher(idx++, "all", "전라남도립도서관")); this.tb_SearchTarget.Items.Clear(); // this.tb_SearchTarget.Items.Add("-- 검색대상을 선택하세요 --");