Commit 6c35aff82d637e6c4f15bb55bc1490a4fe3de221

Authored by tangwang
1 parent d350861f

索引结构修改:

一、tags字段改支持多语言:
spu表tags字段,跟title走一样的翻译逻辑,填入原始语言、zh、en。

检查以下字段,都跟title一样走翻译逻辑
title
keywords
tags
brief
description
vendor
category_path
category_name_text

二、/indexer/enrich-content接口的修改
1.
请求参数,把language去掉,因为我返回的内容直接对应索引结构,不用你做处理了,因此不需要指定语言,降低耦合。
2. 返回 enriched_attributes enriched_tags
   qanchors三个字段,按原始内容填入。
3. enriched_tags是本次新增的,注意区别于tags字段。tags字段来源于mysql
   spu表,enriched_tags是本接口返回的。

三、specifications的value,需要翻译,也是需要填中英文:
{
  "specifications": [
    {
      "sku_id": "sku-red-s",
      "name": "color",
      "value_keyword": "красный",
      "value_text": {
        "zh": "红色",
        "en": "red"
      }
    }
  ]
}
mappings/generate_search_products_mapping.py
... ... @@ -162,7 +162,6 @@ TEXT_EMBEDDING_SIZE = 1024
162 162 IMAGE_EMBEDDING_SIZE = 768
163 163  
164 164 FIELD_SPECS = [
165   - scalar_field("tenant_id", "keyword"),
166 165 scalar_field("spu_id", "keyword"),
167 166 scalar_field("create_time", "date"),
168 167 scalar_field("update_time", "date"),
... ...
mappings/search_products.json
... ... @@ -41,9 +41,6 @@
41 41 },
42 42 "mappings": {
43 43 "properties": {
44   - "tenant_id": {
45   - "type": "keyword"
46   - },
47 44 "spu_id": {
48 45 "type": "keyword"
49 46 },
... ... @@ -1681,20 +1678,6 @@
1681 1678 }
1682 1679 }
1683 1680 },
1684   - "qanchors": {
1685   - "type": "object",
1686   - "properties": {
1687   - "zh": {
1688   - "type": "text",
1689   - "analyzer": "index_ik",
1690   - "search_analyzer": "query_ik"
1691   - },
1692   - "en": {
1693   - "type": "text",
1694   - "analyzer": "english"
1695   - }
1696   - }
1697   - },
1698 1681 "tags": {
1699 1682 "type": "object",
1700 1683 "properties": {
... ... @@ -1718,326 +1701,326 @@
1718 1701 "normalizer": "lowercase"
1719 1702 }
1720 1703 }
1721   - }
1722   - }
1723   - },
1724   - "category_id": {
1725   - "type": "keyword"
1726   - },
1727   - "category_name": {
1728   - "type": "keyword"
1729   - },
1730   - "category_level": {
1731   - "type": "integer"
1732   - },
1733   - "category1_name": {
1734   - "type": "keyword"
1735   - },
1736   - "category2_name": {
1737   - "type": "keyword"
1738   - },
1739   - "category3_name": {
1740   - "type": "keyword"
1741   - },
1742   - "specifications": {
1743   - "type": "nested",
1744   - "properties": {
1745   - "sku_id": {
1746   - "type": "keyword"
1747 1704 },
1748   - "name": {
1749   - "type": "keyword"
  1705 + "ar": {
  1706 + "type": "text",
  1707 + "analyzer": "arabic",
  1708 + "fields": {
  1709 + "keyword": {
  1710 + "type": "keyword",
  1711 + "normalizer": "lowercase"
  1712 + }
  1713 + }
1750 1714 },
1751   - "value": {
1752   - "type": "keyword"
  1715 + "hy": {
  1716 + "type": "text",
  1717 + "analyzer": "armenian",
  1718 + "fields": {
  1719 + "keyword": {
  1720 + "type": "keyword",
  1721 + "normalizer": "lowercase"
  1722 + }
  1723 + }
1753 1724 },
1754   - "value_text": {
1755   - "type": "object",
1756   - "properties": {
1757   - "zh": {
1758   - "type": "text",
1759   - "analyzer": "index_ik",
1760   - "search_analyzer": "query_ik",
1761   - "fields": {
1762   - "keyword": {
1763   - "type": "keyword",
1764   - "normalizer": "lowercase"
1765   - }
1766   - }
1767   - },
1768   - "en": {
1769   - "type": "text",
1770   - "analyzer": "english",
1771   - "fields": {
1772   - "keyword": {
1773   - "type": "keyword",
1774   - "normalizer": "lowercase"
1775   - }
1776   - }
1777   - },
1778   - "ar": {
1779   - "type": "text",
1780   - "analyzer": "arabic",
1781   - "fields": {
1782   - "keyword": {
1783   - "type": "keyword",
1784   - "normalizer": "lowercase"
1785   - }
1786   - }
1787   - },
1788   - "hy": {
1789   - "type": "text",
1790   - "analyzer": "armenian",
1791   - "fields": {
1792   - "keyword": {
1793   - "type": "keyword",
1794   - "normalizer": "lowercase"
1795   - }
1796   - }
1797   - },
1798   - "eu": {
1799   - "type": "text",
1800   - "analyzer": "basque",
1801   - "fields": {
1802   - "keyword": {
1803   - "type": "keyword",
1804   - "normalizer": "lowercase"
1805   - }
1806   - }
1807   - },
1808   - "pt_br": {
1809   - "type": "text",
1810   - "analyzer": "brazilian",
1811   - "fields": {
1812   - "keyword": {
1813   - "type": "keyword",
1814   - "normalizer": "lowercase"
1815   - }
1816   - }
1817   - },
1818   - "bg": {
1819   - "type": "text",
1820   - "analyzer": "bulgarian",
1821   - "fields": {
1822   - "keyword": {
1823   - "type": "keyword",
1824   - "normalizer": "lowercase"
1825   - }
1826   - }
1827   - },
1828   - "ca": {
1829   - "type": "text",
1830   - "analyzer": "catalan",
1831   - "fields": {
1832   - "keyword": {
1833   - "type": "keyword",
1834   - "normalizer": "lowercase"
1835   - }
1836   - }
1837   - },
1838   - "cjk": {
1839   - "type": "text",
1840   - "analyzer": "cjk",
1841   - "fields": {
1842   - "keyword": {
1843   - "type": "keyword",
1844   - "normalizer": "lowercase"
1845   - }
1846   - }
1847   - },
1848   - "cs": {
1849   - "type": "text",
1850   - "analyzer": "czech",
1851   - "fields": {
1852   - "keyword": {
1853   - "type": "keyword",
1854   - "normalizer": "lowercase"
1855   - }
1856   - }
1857   - },
1858   - "da": {
1859   - "type": "text",
1860   - "analyzer": "danish",
1861   - "fields": {
1862   - "keyword": {
1863   - "type": "keyword",
1864   - "normalizer": "lowercase"
1865   - }
1866   - }
1867   - },
1868   - "nl": {
1869   - "type": "text",
1870   - "analyzer": "dutch",
1871   - "fields": {
1872   - "keyword": {
1873   - "type": "keyword",
1874   - "normalizer": "lowercase"
1875   - }
1876   - }
1877   - },
1878   - "fi": {
1879   - "type": "text",
1880   - "analyzer": "finnish",
1881   - "fields": {
1882   - "keyword": {
1883   - "type": "keyword",
1884   - "normalizer": "lowercase"
1885   - }
1886   - }
1887   - },
1888   - "fr": {
1889   - "type": "text",
1890   - "analyzer": "french",
1891   - "fields": {
1892   - "keyword": {
1893   - "type": "keyword",
1894   - "normalizer": "lowercase"
1895   - }
1896   - }
1897   - },
1898   - "gl": {
1899   - "type": "text",
1900   - "analyzer": "galician",
1901   - "fields": {
1902   - "keyword": {
1903   - "type": "keyword",
1904   - "normalizer": "lowercase"
1905   - }
1906   - }
1907   - },
1908   - "de": {
1909   - "type": "text",
1910   - "analyzer": "german",
1911   - "fields": {
1912   - "keyword": {
1913   - "type": "keyword",
1914   - "normalizer": "lowercase"
1915   - }
1916   - }
1917   - },
1918   - "el": {
1919   - "type": "text",
1920   - "analyzer": "greek",
1921   - "fields": {
1922   - "keyword": {
1923   - "type": "keyword",
1924   - "normalizer": "lowercase"
1925   - }
1926   - }
1927   - },
1928   - "hi": {
1929   - "type": "text",
1930   - "analyzer": "hindi",
1931   - "fields": {
1932   - "keyword": {
1933   - "type": "keyword",
1934   - "normalizer": "lowercase"
1935   - }
1936   - }
1937   - },
1938   - "hu": {
1939   - "type": "text",
1940   - "analyzer": "hungarian",
1941   - "fields": {
1942   - "keyword": {
1943   - "type": "keyword",
1944   - "normalizer": "lowercase"
1945   - }
1946   - }
1947   - },
1948   - "id": {
1949   - "type": "text",
1950   - "analyzer": "indonesian",
1951   - "fields": {
1952   - "keyword": {
1953   - "type": "keyword",
1954   - "normalizer": "lowercase"
1955   - }
1956   - }
1957   - },
1958   - "it": {
1959   - "type": "text",
1960   - "analyzer": "italian",
1961   - "fields": {
1962   - "keyword": {
1963   - "type": "keyword",
1964   - "normalizer": "lowercase"
1965   - }
1966   - }
1967   - },
1968   - "no": {
1969   - "type": "text",
1970   - "analyzer": "norwegian",
1971   - "fields": {
1972   - "keyword": {
1973   - "type": "keyword",
1974   - "normalizer": "lowercase"
1975   - }
1976   - }
1977   - },
1978   - "fa": {
1979   - "type": "text",
1980   - "analyzer": "persian",
1981   - "fields": {
1982   - "keyword": {
1983   - "type": "keyword",
1984   - "normalizer": "lowercase"
1985   - }
1986   - }
1987   - },
1988   - "pt": {
1989   - "type": "text",
1990   - "analyzer": "portuguese",
1991   - "fields": {
1992   - "keyword": {
1993   - "type": "keyword",
1994   - "normalizer": "lowercase"
1995   - }
1996   - }
1997   - },
1998   - "ro": {
1999   - "type": "text",
2000   - "analyzer": "romanian",
2001   - "fields": {
2002   - "keyword": {
2003   - "type": "keyword",
2004   - "normalizer": "lowercase"
2005   - }
2006   - }
2007   - },
2008   - "ru": {
2009   - "type": "text",
2010   - "analyzer": "russian",
2011   - "fields": {
2012   - "keyword": {
2013   - "type": "keyword",
2014   - "normalizer": "lowercase"
2015   - }
2016   - }
2017   - },
2018   - "es": {
2019   - "type": "text",
2020   - "analyzer": "spanish",
2021   - "fields": {
2022   - "keyword": {
2023   - "type": "keyword",
2024   - "normalizer": "lowercase"
2025   - }
2026   - }
2027   - },
2028   - "sv": {
2029   - "type": "text",
2030   - "analyzer": "swedish",
2031   - "fields": {
2032   - "keyword": {
2033   - "type": "keyword",
2034   - "normalizer": "lowercase"
2035   - }
2036   - }
2037   - },
2038   - "tr": {
  1725 + "eu": {
  1726 + "type": "text",
  1727 + "analyzer": "basque",
  1728 + "fields": {
  1729 + "keyword": {
  1730 + "type": "keyword",
  1731 + "normalizer": "lowercase"
  1732 + }
  1733 + }
  1734 + },
  1735 + "pt_br": {
  1736 + "type": "text",
  1737 + "analyzer": "brazilian",
  1738 + "fields": {
  1739 + "keyword": {
  1740 + "type": "keyword",
  1741 + "normalizer": "lowercase"
  1742 + }
  1743 + }
  1744 + },
  1745 + "bg": {
  1746 + "type": "text",
  1747 + "analyzer": "bulgarian",
  1748 + "fields": {
  1749 + "keyword": {
  1750 + "type": "keyword",
  1751 + "normalizer": "lowercase"
  1752 + }
  1753 + }
  1754 + },
  1755 + "ca": {
  1756 + "type": "text",
  1757 + "analyzer": "catalan",
  1758 + "fields": {
  1759 + "keyword": {
  1760 + "type": "keyword",
  1761 + "normalizer": "lowercase"
  1762 + }
  1763 + }
  1764 + },
  1765 + "cjk": {
  1766 + "type": "text",
  1767 + "analyzer": "cjk",
  1768 + "fields": {
  1769 + "keyword": {
  1770 + "type": "keyword",
  1771 + "normalizer": "lowercase"
  1772 + }
  1773 + }
  1774 + },
  1775 + "cs": {
  1776 + "type": "text",
  1777 + "analyzer": "czech",
  1778 + "fields": {
  1779 + "keyword": {
  1780 + "type": "keyword",
  1781 + "normalizer": "lowercase"
  1782 + }
  1783 + }
  1784 + },
  1785 + "da": {
  1786 + "type": "text",
  1787 + "analyzer": "danish",
  1788 + "fields": {
  1789 + "keyword": {
  1790 + "type": "keyword",
  1791 + "normalizer": "lowercase"
  1792 + }
  1793 + }
  1794 + },
  1795 + "nl": {
  1796 + "type": "text",
  1797 + "analyzer": "dutch",
  1798 + "fields": {
  1799 + "keyword": {
  1800 + "type": "keyword",
  1801 + "normalizer": "lowercase"
  1802 + }
  1803 + }
  1804 + },
  1805 + "fi": {
  1806 + "type": "text",
  1807 + "analyzer": "finnish",
  1808 + "fields": {
  1809 + "keyword": {
  1810 + "type": "keyword",
  1811 + "normalizer": "lowercase"
  1812 + }
  1813 + }
  1814 + },
  1815 + "fr": {
  1816 + "type": "text",
  1817 + "analyzer": "french",
  1818 + "fields": {
  1819 + "keyword": {
  1820 + "type": "keyword",
  1821 + "normalizer": "lowercase"
  1822 + }
  1823 + }
  1824 + },
  1825 + "gl": {
  1826 + "type": "text",
  1827 + "analyzer": "galician",
  1828 + "fields": {
  1829 + "keyword": {
  1830 + "type": "keyword",
  1831 + "normalizer": "lowercase"
  1832 + }
  1833 + }
  1834 + },
  1835 + "de": {
  1836 + "type": "text",
  1837 + "analyzer": "german",
  1838 + "fields": {
  1839 + "keyword": {
  1840 + "type": "keyword",
  1841 + "normalizer": "lowercase"
  1842 + }
  1843 + }
  1844 + },
  1845 + "el": {
  1846 + "type": "text",
  1847 + "analyzer": "greek",
  1848 + "fields": {
  1849 + "keyword": {
  1850 + "type": "keyword",
  1851 + "normalizer": "lowercase"
  1852 + }
  1853 + }
  1854 + },
  1855 + "hi": {
  1856 + "type": "text",
  1857 + "analyzer": "hindi",
  1858 + "fields": {
  1859 + "keyword": {
  1860 + "type": "keyword",
  1861 + "normalizer": "lowercase"
  1862 + }
  1863 + }
  1864 + },
  1865 + "hu": {
  1866 + "type": "text",
  1867 + "analyzer": "hungarian",
  1868 + "fields": {
  1869 + "keyword": {
  1870 + "type": "keyword",
  1871 + "normalizer": "lowercase"
  1872 + }
  1873 + }
  1874 + },
  1875 + "id": {
  1876 + "type": "text",
  1877 + "analyzer": "indonesian",
  1878 + "fields": {
  1879 + "keyword": {
  1880 + "type": "keyword",
  1881 + "normalizer": "lowercase"
  1882 + }
  1883 + }
  1884 + },
  1885 + "it": {
  1886 + "type": "text",
  1887 + "analyzer": "italian",
  1888 + "fields": {
  1889 + "keyword": {
  1890 + "type": "keyword",
  1891 + "normalizer": "lowercase"
  1892 + }
  1893 + }
  1894 + },
  1895 + "no": {
  1896 + "type": "text",
  1897 + "analyzer": "norwegian",
  1898 + "fields": {
  1899 + "keyword": {
  1900 + "type": "keyword",
  1901 + "normalizer": "lowercase"
  1902 + }
  1903 + }
  1904 + },
  1905 + "fa": {
  1906 + "type": "text",
  1907 + "analyzer": "persian",
  1908 + "fields": {
  1909 + "keyword": {
  1910 + "type": "keyword",
  1911 + "normalizer": "lowercase"
  1912 + }
  1913 + }
  1914 + },
  1915 + "pt": {
  1916 + "type": "text",
  1917 + "analyzer": "portuguese",
  1918 + "fields": {
  1919 + "keyword": {
  1920 + "type": "keyword",
  1921 + "normalizer": "lowercase"
  1922 + }
  1923 + }
  1924 + },
  1925 + "ro": {
  1926 + "type": "text",
  1927 + "analyzer": "romanian",
  1928 + "fields": {
  1929 + "keyword": {
  1930 + "type": "keyword",
  1931 + "normalizer": "lowercase"
  1932 + }
  1933 + }
  1934 + },
  1935 + "ru": {
  1936 + "type": "text",
  1937 + "analyzer": "russian",
  1938 + "fields": {
  1939 + "keyword": {
  1940 + "type": "keyword",
  1941 + "normalizer": "lowercase"
  1942 + }
  1943 + }
  1944 + },
  1945 + "es": {
  1946 + "type": "text",
  1947 + "analyzer": "spanish",
  1948 + "fields": {
  1949 + "keyword": {
  1950 + "type": "keyword",
  1951 + "normalizer": "lowercase"
  1952 + }
  1953 + }
  1954 + },
  1955 + "sv": {
  1956 + "type": "text",
  1957 + "analyzer": "swedish",
  1958 + "fields": {
  1959 + "keyword": {
  1960 + "type": "keyword",
  1961 + "normalizer": "lowercase"
  1962 + }
  1963 + }
  1964 + },
  1965 + "tr": {
  1966 + "type": "text",
  1967 + "analyzer": "turkish",
  1968 + "fields": {
  1969 + "keyword": {
  1970 + "type": "keyword",
  1971 + "normalizer": "lowercase"
  1972 + }
  1973 + }
  1974 + },
  1975 + "th": {
  1976 + "type": "text",
  1977 + "analyzer": "thai",
  1978 + "fields": {
  1979 + "keyword": {
  1980 + "type": "keyword",
  1981 + "normalizer": "lowercase"
  1982 + }
  1983 + }
  1984 + }
  1985 + }
  1986 + },
  1987 + "category_id": {
  1988 + "type": "keyword"
  1989 + },
  1990 + "category_name": {
  1991 + "type": "keyword"
  1992 + },
  1993 + "category_level": {
  1994 + "type": "integer"
  1995 + },
  1996 + "category1_name": {
  1997 + "type": "keyword"
  1998 + },
  1999 + "category2_name": {
  2000 + "type": "keyword"
  2001 + },
  2002 + "category3_name": {
  2003 + "type": "keyword"
  2004 + },
  2005 + "specifications": {
  2006 + "type": "nested",
  2007 + "properties": {
  2008 + "sku_id": {
  2009 + "type": "keyword"
  2010 + },
  2011 + "name": {
  2012 + "type": "keyword"
  2013 + },
  2014 + "value_keyword": {
  2015 + "type": "keyword"
  2016 + },
  2017 + "value_text": {
  2018 + "type": "object",
  2019 + "properties": {
  2020 + "zh": {
2039 2021 "type": "text",
2040   - "analyzer": "turkish",
  2022 + "analyzer": "index_ik",
  2023 + "search_analyzer": "query_ik",
2041 2024 "fields": {
2042 2025 "keyword": {
2043 2026 "type": "keyword",
... ... @@ -2045,9 +2028,9 @@
2045 2028 }
2046 2029 }
2047 2030 },
2048   - "th": {
  2031 + "en": {
2049 2032 "type": "text",
2050   - "analyzer": "thai",
  2033 + "analyzer": "english",
2051 2034 "fields": {
2052 2035 "keyword": {
2053 2036 "type": "keyword",
... ... @@ -2059,6 +2042,46 @@
2059 2042 }
2060 2043 }
2061 2044 },
  2045 + "qanchors": {
  2046 + "type": "object",
  2047 + "properties": {
  2048 + "zh": {
  2049 + "type": "text",
  2050 + "analyzer": "index_ik",
  2051 + "search_analyzer": "query_ik"
  2052 + },
  2053 + "en": {
  2054 + "type": "text",
  2055 + "analyzer": "english"
  2056 + }
  2057 + }
  2058 + },
  2059 + "enriched_tags": {
  2060 + "type": "object",
  2061 + "properties": {
  2062 + "zh": {
  2063 + "type": "text",
  2064 + "analyzer": "index_ik",
  2065 + "search_analyzer": "query_ik",
  2066 + "fields": {
  2067 + "keyword": {
  2068 + "type": "keyword",
  2069 + "normalizer": "lowercase"
  2070 + }
  2071 + }
  2072 + },
  2073 + "en": {
  2074 + "type": "text",
  2075 + "analyzer": "english",
  2076 + "fields": {
  2077 + "keyword": {
  2078 + "type": "keyword",
  2079 + "normalizer": "lowercase"
  2080 + }
  2081 + }
  2082 + }
  2083 + }
  2084 + },
2062 2085 "enriched_attributes": {
2063 2086 "type": "nested",
2064 2087 "properties": {
... ...