Skip to content

Commit

Permalink
Merge pull request #83 from jamebal/ocr
Browse files Browse the repository at this point in the history
feat: 添加OCR支持
  • Loading branch information
jamebal authored May 31, 2024
2 parents 315ab00 + 917f5c0 commit 70f6859
Show file tree
Hide file tree
Showing 30 changed files with 580 additions and 272 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ ENV LOG_LEVEL warn

ENV FILE_MONITOR true
ENV FILE_ROOT_DIR /jmalcloud/files
ENV TESS4J_DATA_PATH /jmalcloud/tess4j/datapath

ADD target/clouddisk-${VERSION}.jar /usr/local/

Expand All @@ -27,4 +28,4 @@ ENV DOCKER_DEFAULT_PLATFORM=linux/amd64,linux/arm64

EXPOSE 8088

CMD java -Dfile.encoding=UTF-8 -Dloader.path=/usr/local/clouddisk-lib -jar ${JVM_OPTS} /usr/local/clouddisk-${VERSION}.jar --spring.profiles.active=${RUN_ENVIRONMENT} --spring.data.mongodb.uri=${MONGODB_URI} --file.monitor=${FILE_MONITOR} --file.rootDir=${FILE_ROOT_DIR} --logging.level.root=${LOG_LEVEL} --file.ip2region-db-path=/jmalcloud/ip2region.xdb
CMD java -Dfile.encoding=UTF-8 -Dloader.path=/usr/local/clouddisk-lib -jar ${JVM_OPTS} /usr/local/clouddisk-${VERSION}.jar --spring.profiles.active=${RUN_ENVIRONMENT} --spring.data.mongodb.uri=${MONGODB_URI} --tess4j.data-path=${TESS4J_DATA_PATH} --file.monitor=${FILE_MONITOR} --file.rootDir=${FILE_ROOT_DIR} --logging.level.root=${LOG_LEVEL} --file.ip2region-db-path=/jmalcloud/ip2region.xdb
12 changes: 7 additions & 5 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
FROM eclipse-temurin:17-jre

# 安装 ffmpeg
# 安装 ffmpeg 和 tesseract
RUN apt-get update && \
apt-get install -y ffmpeg libavcodec-extra && apt-get install -y locales && \
locale-gen en_US.UTF-8 && \
update-locale LANG=en_US.UTF-8
apt-get install -y ffmpeg libavcodec-extra locales tesseract-ocr && \
locale-gen en_US.UTF-8 && \
update-locale LANG=en_US.UTF-8

# 设置环境变量
ENV LANG=en_US.UTF-8 \
Expand All @@ -14,8 +14,10 @@ ENV LANG=en_US.UTF-8 \
# 设置时区
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

RUN mkdir -p /jmalcloud/files
RUN mkdir -p /jmalcloud/files /jmalcloud/tess4j/datapath

ADD docker/ip2region.xdb /jmalcloud/

ADD tess4j/datapath/chi_sim.traineddata /jmalcloud/tess4j/datapath/

ADD target/lib /usr/local/clouddisk-lib
3 changes: 2 additions & 1 deletion docker/jmalcloud-nvidia/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ ENV LOG_LEVEL warn

ENV FILE_MONITOR true
ENV FILE_ROOT_DIR /jmalcloud/files
ENV TESS4J_DATA_PATH /jmalcloud/tess4j/datapath

ADD target/clouddisk-${VERSION}.jar /usr/local/

Expand All @@ -27,4 +28,4 @@ ENV DOCKER_DEFAULT_PLATFORM=linux/amd64,linux/arm64

EXPOSE 8088

CMD java -Dfile.encoding=UTF-8 -Dloader.path=/usr/local/clouddisk-lib -jar ${JVM_OPTS} /usr/local/clouddisk-${VERSION}.jar --spring.profiles.active=${RUN_ENVIRONMENT} --spring.data.mongodb.uri=${MONGODB_URI} --file.monitor=${FILE_MONITOR} --file.rootDir=${FILE_ROOT_DIR} --logging.level.root=${LOG_LEVEL} --file.ip2region-db-path=/jmalcloud/ip2region.xdb
CMD java -Dfile.encoding=UTF-8 -Dloader.path=/usr/local/clouddisk-lib -jar ${JVM_OPTS} /usr/local/clouddisk-${VERSION}.jar --spring.profiles.active=${RUN_ENVIRONMENT} --spring.data.mongodb.uri=${MONGODB_URI} --tess4j.data-path=${TESS4J_DATA_PATH} --file.monitor=${FILE_MONITOR} --file.rootDir=${FILE_ROOT_DIR} --logging.level.root=${LOG_LEVEL} --file.ip2region-db-path=/jmalcloud/ip2region.xdb
9 changes: 6 additions & 3 deletions docker/nvidia/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ FROM nvidia/cuda:11.7.1-base-ubuntu22.04 AS base
# 设置非交互式安装,避免 tzdata 等包的配置暂停
ENV DEBIAN_FRONTEND=noninteractive

# 安装wget
RUN apt-get update && apt-get install -y wget && apt-get install -y locales && \
# 安装wget 和 tesseract
RUN apt-get update && apt-get install -y wget && apt-get install -y locales tesseract-ocr && \
locale-gen en_US.UTF-8 && \
update-locale LANG=en_US.UTF-8

Expand Down Expand Up @@ -42,10 +42,13 @@ COPY --from=java-base /opt/java/openjdk /opt/java/openjdk
# 设置时区
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime

RUN mkdir -p /jmalcloud/files
RUN mkdir -p /jmalcloud/files /jmalcloud/tess4j/datapath

ADD docker/ip2region.xdb /jmalcloud/

ADD tess4j/datapath/chi_sim.traineddata /jmalcloud/tess4j/datapath/


ADD target/lib /usr/local/clouddisk-lib

# 更新 PATH 和 LD_LIBRARY_PATH
Expand Down
10 changes: 8 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@
<version>0.2.2</version>
</dependency>

<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>5.11.0</version>
</dependency>

<!-- 反射-->
<dependency>
<groupId>org.reflections</groupId>
Expand Down Expand Up @@ -206,7 +212,7 @@
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.28</version>
<version>3.0.2</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
Expand Down Expand Up @@ -285,7 +291,7 @@
<dependency>
<groupId>com.qcloud</groupId>
<artifactId>cos_api</artifactId>
<version>5.6.210</version>
<version>5.6.213</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/com/jmal/clouddisk/ClouddiskApplication.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
package com.jmal.clouddisk;

import cn.hutool.core.io.FileUtil;
import cn.hutool.crypto.SecureUtil;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cache.annotation.EnableCaching;
import org.springframework.scheduling.annotation.EnableScheduling;

import java.nio.file.Path;
import java.nio.file.Paths;

/**
* ClouddiskApplication
*
Expand All @@ -21,6 +25,13 @@ public static void main(String[] args) {
SpringApplication application = new SpringApplication(ClouddiskApplication.class);
// 允许循环引用
application.setAllowCircularReferences(true);

// dev环境下设置tesseract的lib路径
Path tesseractLibPath = Paths.get("/opt/homebrew/Cellar/tesseract/5.3.4_1/lib");
if (FileUtil.exist(tesseractLibPath.toFile())) {
System.setProperty("jna.library.path", tesseractLibPath.toString());
}

application.run(args);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,20 @@
import com.jmal.clouddisk.annotation.LogOperatingFun;
import com.jmal.clouddisk.annotation.Permission;
import com.jmal.clouddisk.interceptor.AuthInterceptor;
import com.jmal.clouddisk.model.LdapConfigDTO;
import com.jmal.clouddisk.model.LogOperation;
import com.jmal.clouddisk.model.rbac.ConsumerDTO;
import com.jmal.clouddisk.service.IAuthService;
import com.jmal.clouddisk.service.IUserService;
import com.jmal.clouddisk.util.ResponseResult;
import com.jmal.clouddisk.util.ResultUtil;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.servlet.http.HttpServletResponse;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;

/**
* 登录、登出、验证
Expand All @@ -39,28 +40,6 @@ public ResponseResult<Object> login(@RequestBody ConsumerDTO userDTO, HttpServle
return authService.login(response, userDTO);
}

@Operation(summary = "加载ldap配置")
@LogOperatingFun(logType = LogOperation.Type.LOGIN)
@GetMapping("/ldap/config")
public ResponseResult<Object> loadLdapConfig() {
return ResultUtil.success(authService.loadLdapConfig());
}

@Operation(summary = "ldap配置")
@LogOperatingFun(logType = LogOperation.Type.LOGIN)
@PutMapping("/ldap/config")
public ResponseResult<Object> updateLdapConfig(@RequestBody LdapConfigDTO ldapConfigDTO) {
return authService.updateLdapConfig(ldapConfigDTO);
}

@Operation(summary = "测试ldap配置")
@LogOperatingFun(logType = LogOperation.Type.BROWSE)
@PutMapping("/ldap/test-config")
public ResponseResult<Object> testLdapConfig(@RequestBody LdapConfigDTO ldapConfigDTO) {
authService.testLdapConfig(ldapConfigDTO);
return ResultUtil.success();
}

@Operation(summary = "校验旧密码")
@PostMapping("/valid-old-pass")
@LogOperatingFun(logType = LogOperation.Type.BROWSE)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package com.jmal.clouddisk.controller.rest;

import com.jmal.clouddisk.annotation.LogOperatingFun;
import com.jmal.clouddisk.annotation.Permission;
import com.jmal.clouddisk.model.LdapConfigDTO;
import com.jmal.clouddisk.model.LogOperation;
import com.jmal.clouddisk.service.IAuthService;
import com.jmal.clouddisk.service.IUserService;
import com.jmal.clouddisk.service.impl.SettingService;
import com.jmal.clouddisk.util.ResponseResult;
import com.jmal.clouddisk.util.ResultUtil;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;

@RestController
@Tag(name = "网盘设置")
@RequiredArgsConstructor
public class CloudSettingController {

private final SettingService settingService;

private final IUserService userService;

private final IAuthService authService;

@Operation(summary = "重建索引")
@GetMapping("/user/setting/sync")
@Permission(value = "cloud:set:sync")
@LogOperatingFun
public ResponseResult<Object> sync() {
return settingService.sync(null);
}

@Operation(summary = "上传网盘logo")
@PostMapping("/user/setting/upload_logo")
@Permission(value = "cloud:set:sync")
@LogOperatingFun
public ResponseResult<Object> uploadLogo(MultipartFile file) {
return settingService.uploadLogo(file);
}

@Operation(summary = "修改网盘名称")
@PutMapping("/user/setting/update_netdisk_name")
@Permission(value = "cloud:set:sync")
@LogOperatingFun
public ResponseResult<Object> updateNetdiskName(@RequestParam String netdiskName) {
return settingService.updateNetdiskName(netdiskName);
}

@Operation(summary = "是否正在同步")
@GetMapping("/user/setting/isSync")
@LogOperatingFun
public ResponseResult<Object> isSync(@RequestParam String username) {
return settingService.isSync(username);
}

@Operation(summary = "重置角色菜单")
@PutMapping("/user/setting/resetMenuAndRole")
@Permission(onlyCreator = true)
@LogOperatingFun
public ResponseResult<Object> resetMenuAndRole() {
settingService.resetMenuAndRole();
return ResultUtil.success();
}

@Operation(summary = "获取是否禁用webp状态")
@GetMapping("/user/setting/get/webp")
@Permission("sys:user:list")
@LogOperatingFun(logType = LogOperation.Type.BROWSE)
public ResponseResult<Boolean> getDisabledWebp(@RequestParam String userId) {
return ResultUtil.success(userService.getDisabledWebp(userId));
}

@Operation(summary = "是否禁用webp(默认开启)")
@PutMapping("/user/setting/disabled/webp")
@Permission("sys:user:update")
@LogOperatingFun
public ResponseResult<Object> disabledWebp(@RequestParam String userId, @RequestParam Boolean disabled) {
userService.disabledWebp(userId, disabled);
return ResultUtil.success();
}

@Operation(summary = "加载ldap配置")
@LogOperatingFun(logType = LogOperation.Type.LOGIN)
@GetMapping("/ldap/config")
public ResponseResult<Object> loadLdapConfig() {
return ResultUtil.success(authService.loadLdapConfig());
}

@Operation(summary = "ldap配置")
@LogOperatingFun(logType = LogOperation.Type.LOGIN)
@PutMapping("/ldap/config")
public ResponseResult<Object> updateLdapConfig(@RequestBody LdapConfigDTO ldapConfigDTO) {
return authService.updateLdapConfig(ldapConfigDTO);
}

@Operation(summary = "测试ldap配置")
@LogOperatingFun(logType = LogOperation.Type.BROWSE)
@PutMapping("/ldap/test-config")
public ResponseResult<Object> testLdapConfig(@RequestBody LdapConfigDTO ldapConfigDTO) {
authService.testLdapConfig(ldapConfigDTO);
return ResultUtil.success();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import com.jmal.clouddisk.model.FileIntroVO;
import com.jmal.clouddisk.model.query.SearchDTO;
import com.jmal.clouddisk.service.impl.LuceneService;
import com.jmal.clouddisk.lucene.LuceneService;
import com.jmal.clouddisk.util.ResponseResult;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.jmal.clouddisk.controller.rest;

import cn.hutool.core.date.TimeInterval;
import cn.hutool.core.io.FileUtil;
import cn.hutool.http.HttpUtil;
import com.jmal.clouddisk.ocr.OcrService;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;

@RestController
@RequiredArgsConstructor
@Tag(name = "OCR")
@Slf4j
public class OcrController {

private final OcrService ocrService;

@GetMapping("/ocr")
public String performOcr(@RequestParam String fileUrl) {
String tempImagePath = ocrService.generateOrcTempImagePath();
try {
HttpUtil.downloadFile(fileUrl, tempImagePath);
TimeInterval timeInterval = new TimeInterval();
timeInterval.start();
String str = ocrService.doOCR(tempImagePath, null);
log.info("OCR time consuming: {}", timeInterval.intervalMs());
return str;
} finally {
// 删除临时文件
FileUtil.del(tempImagePath);
}
}
}
Loading

0 comments on commit 70f6859

Please sign in to comment.