-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #68 from codefuse-ai/xxh_dev
[Feat]Add properties extractor source code and library files
- Loading branch information
Showing
69 changed files
with
4,872 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# Introduction | ||
The codefuse-query properties extractor transforms the source code of xml file into standardized coref-properties data, which is utilized for further analysis by codefuse-query. | ||
|
||
# Quick Start | ||
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet. | ||
2. Build. Execute `mvn clean install`. | ||
3. Run. Execute `java -jar target/properties-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db`. | ||
|
||
After execution, a file named coref_properties_src.db will be generated in the ./db directory. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# 简介 | ||
Codefuse-query Properties 提取器将 Properties 文件的源代码转换为标准化的 coref-properties 数据,这些数据用于 codefuse-query 进行进一步分析。 | ||
|
||
# 快速开始 | ||
1. 设置 JAVA_HOME。执行 echo $JAVA_HOME 来显示当前的设置。如果显示为空,则表示尚未配置。 | ||
2. 构建。执行 mvn clean install。 | ||
3. 运行。执行 java -jar target/properties-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db。 | ||
|
||
执行后,一个名为 coref_properties_src.db 的文件将生成在 ./db 目录下。 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>com.alipay.codequery.properties</groupId> | ||
<artifactId>properties-extractor</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
|
||
<packaging>jar</packaging> | ||
|
||
<name>properties-extractor</name> | ||
<url>http://maven.apache.org</url> | ||
|
||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
</properties> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.12</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 --> | ||
<dependency> | ||
<groupId>org.apache.commons</groupId> | ||
<artifactId>commons-lang3</artifactId> | ||
<version>3.12.0</version> | ||
</dependency> | ||
<!-- https://mvnrepository.com/artifact/commons-codec/commons-codec --> | ||
<dependency> | ||
<groupId>commons-codec</groupId> | ||
<artifactId>commons-codec</artifactId> | ||
<version>1.15</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.projectlombok</groupId> | ||
<artifactId>lombok</artifactId> | ||
<version>1.18.16</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.xerial</groupId> | ||
<artifactId>sqlite-jdbc</artifactId> | ||
<version>3.36.0.2</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.mybatis</groupId> | ||
<artifactId>mybatis</artifactId> | ||
<version>3.5.7</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>tk.mybatis</groupId> | ||
<artifactId>mapper</artifactId> | ||
<!-- 建议使用最新版本,最新版本请从项目首页查找 --> | ||
<version>4.1.5</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.logging.log4j</groupId> | ||
<artifactId>log4j-core</artifactId> | ||
<version>2.14.1</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.logging.log4j</groupId> | ||
<artifactId>log4j-api</artifactId> | ||
<version>2.14.1</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.logging.log4j</groupId> | ||
<artifactId>log4j-slf4j-impl</artifactId> | ||
<version>2.14.1</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>info.picocli</groupId> | ||
<artifactId>picocli</artifactId> | ||
<version>4.6.1</version> | ||
</dependency> | ||
|
||
</dependencies> | ||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<configuration> | ||
<source>8</source> | ||
<target>8</target> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-surefire-plugin</artifactId> | ||
<version>2.4.2</version> | ||
<configuration> | ||
<skipTests>true</skipTests> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.mybatis.generator</groupId> | ||
<artifactId>mybatis-generator-maven-plugin</artifactId> | ||
<version>1.3.7</version> | ||
<configuration> | ||
<verbose>true</verbose> | ||
<overwrite>true</overwrite> | ||
</configuration> | ||
<dependencies> | ||
<dependency> | ||
<groupId>org.xerial</groupId> | ||
<artifactId>sqlite-jdbc</artifactId> | ||
<version>3.36.0.2</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>tk.mybatis</groupId> | ||
<artifactId>mapper</artifactId> | ||
<version>4.1.5</version> | ||
</dependency> | ||
</dependencies> | ||
<executions> | ||
<execution> | ||
<id>Generate MyBatis Artifacts</id> | ||
<goals> | ||
<goal>generate</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-assembly-plugin</artifactId> | ||
<version>2.5.5</version> | ||
<configuration> | ||
<archive> | ||
<manifest> | ||
<mainClass>com.alipay.codequery.properties.Extractor</mainClass> | ||
</manifest> | ||
</archive> | ||
<descriptorRefs> | ||
<descriptorRef>jar-with-dependencies</descriptorRef> | ||
</descriptorRefs> | ||
</configuration> | ||
<executions> | ||
<execution> | ||
<id>make-assembly</id> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>single</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
|
||
</plugins> | ||
</build> | ||
</project> |
141 changes: 141 additions & 0 deletions
141
language/properties/extractor/src/main/java/com/alipay/codequery/properties/Extractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
|
||
package com.alipay.codequery.properties; | ||
|
||
import com.alipay.codequery.properties.core.CorefExtractor; | ||
import com.alipay.codequery.properties.model.Folder; | ||
import com.alipay.codequery.properties.model.Node; | ||
import com.alipay.codequery.properties.model.Program; | ||
import com.alipay.codequery.properties.storage.CorefStorage; | ||
import com.alipay.codequery.properties.core.CorefURI; | ||
import com.alipay.codequery.properties.util.LoggerUtil; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.apache.logging.log4j.Level; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import picocli.CommandLine; | ||
import picocli.CommandLine.Command; | ||
import picocli.CommandLine.Parameters; | ||
|
||
import java.io.*; | ||
import java.util.concurrent.Callable; | ||
|
||
|
||
@Command(name = "extract", mixinStandardHelpOptions = true, version = "extract 1.0", | ||
description = "extract COREF-Properties db from a src directory.") | ||
@Slf4j | ||
public class Extractor implements Callable<Integer> { | ||
|
||
private static final Logger logger = LogManager.getLogger(Extractor.class); | ||
@Parameters(index = "0", description = "The source directory to extract.") | ||
private File srcRootDir; | ||
|
||
@Parameters(index = "1", description = "The output directory for the DB file.") | ||
private File dbDir; | ||
|
||
@CommandLine.Option(names = {"--corpus"}, description = "Specify the corpus of the codebase.") | ||
private String corpus = ""; | ||
|
||
/** | ||
* | ||
* main method. | ||
*/ | ||
public static void main(String[] args) { | ||
int exitCode = new CommandLine(new Extractor()).execute(args); | ||
System.exit(exitCode); | ||
} | ||
|
||
private void parse(File rootDir, CorefStorage corefStorage, CorefURI corefURI) throws IOException{ | ||
File[] files = rootDir.listFiles(); | ||
for (File file : files) { | ||
if (file.isDirectory()) { | ||
parse(file, corefStorage, corefURI); | ||
} else { | ||
|
||
// Support extracting file's extension is 'properties' or 'properties.vm'. | ||
if (file.getName().endsWith(".properties") || file.getName().endsWith(".properties.vm")) { | ||
logger.info("Start Extracting properties file: {}", file.getAbsolutePath()); | ||
try { | ||
CorefExtractor extractor = new CorefExtractor(file, corefStorage, srcRootDir.getAbsolutePath(), corefURI); | ||
extractor.parse(); | ||
} catch (Exception e) { | ||
logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath()); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
private Program createProgramNode(String repoDir, CorefStorage corefStorage, CorefURI corefURI) { | ||
Program program = new Program(); | ||
program.oid = corefURI.generateCorpusOid(); | ||
program.prefix = repoDir; | ||
corefStorage.storeProgram(program.extractProgram()); | ||
return program; | ||
} | ||
|
||
private void visitDirectory(String repoDir, Node parent, CorefStorage corefStorage, CorefURI corefURI) { | ||
File file = new File(repoDir); | ||
|
||
// Ignore the folder starts with "." | ||
if (file.isDirectory() && !(file.getName().startsWith("."))) { | ||
String absolutePath = file.getAbsolutePath(); | ||
|
||
Folder parentFolder = new Folder(); | ||
parentFolder.name = file.getName(); | ||
parentFolder.parent = parent; | ||
|
||
// Calculate the relative path of the folder. | ||
if (absolutePath.endsWith(srcRootDir.getAbsolutePath())) { | ||
parentFolder.relativePath = "ROOT"; | ||
} else { | ||
char head = repoDir.charAt(0); | ||
switch (head) { | ||
case '/': | ||
parentFolder.relativePath = absolutePath.substring(srcRootDir.getAbsolutePath().length() + 1); | ||
break; | ||
case '.': | ||
parentFolder.relativePath = absolutePath.substring(absolutePath.indexOf(repoDir) + 2); | ||
break; | ||
default: | ||
parentFolder.relativePath = absolutePath.substring(absolutePath.indexOf(repoDir)); | ||
} | ||
} | ||
corefURI.setPath(parentFolder.relativePath); | ||
parentFolder.oid = corefURI.generateFileOid(); | ||
corefStorage.storeFolder(parentFolder.extractFolder()); | ||
|
||
// Recursively visit the sub folders. | ||
for (File f : file.listFiles()) { | ||
if (f.isDirectory()) { | ||
visitDirectory(f.getAbsolutePath(), parentFolder, corefStorage, corefURI); | ||
} else if (f.getName().endsWith(".properties")) { | ||
CorefURI.fileMap.put(f.getAbsolutePath(), parentFolder.oid); | ||
} | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Override the call method. | ||
* @return | ||
* @throws Exception | ||
*/ | ||
@Override | ||
public Integer call() throws Exception { | ||
LoggerUtil.initLogger(Level.INFO); | ||
|
||
long start = System.currentTimeMillis(); | ||
CorefStorage corefStorage = new CorefStorage(dbDir.getAbsolutePath()); | ||
CorefURI corefURI = StringUtils.isBlank(corpus) ? new CorefURI(srcRootDir.getAbsolutePath()) : new CorefURI(corpus); | ||
Program program = createProgramNode(srcRootDir.getAbsolutePath(), corefStorage, corefURI); | ||
visitDirectory(srcRootDir.getAbsolutePath(), program, corefStorage, corefURI); | ||
|
||
parse(srcRootDir, corefStorage, corefURI); | ||
corefStorage.store(); | ||
|
||
logger.info("Time to completion (TTC): " + (System.currentTimeMillis() - start)); | ||
|
||
return 0; | ||
} | ||
} |
Oops, something went wrong.